HPC tutorial heading HPC tutorial heading
  •                          
 
NCAR

 

Example 5 - shallow5.f - Code parallelized using OpenMP threading library. Shared memory parallelization.

Download shallow5.f code.

1.	      PROGRAM SHALOW
2.	*     In this version, shallow5.f, initial and calculated values
3.	*     of U, V, and P are written to a netCDF file
4.	*     for later use in visualizing the results. The netCDF data
5.	*     management library is freely available from
6.	*     http://www.unidata.ucar.edu/software/netcdf
7.	*     OpenMP parallel dos have been added to the major worker
8.	*     loops and a new loop added to demonstrate further work
9.	*     The original code has been brought up to more modern
10.	*     Fortran constructs and uses portable intrinsic Fortran 90 timing routines
11.	*     This can be compiled on the IBM SP using:
12.	*     xlf90_r -qmaxmem=-1 -g -o shallow5 -qsmp=omp -qfixed=132 -qsclk=micro \
13.	*     -I/usr/local/include shallow5.f -L/usr/local/lib32/r4i4 -l netcdf
14.	*     where the -L and -I point to local installation of netCDF and
15.	*     -qsmp=omp is XLF's option for OpenMP threading
16.	      
17.	      include 'netcdf.inc'
18.	C     
19.	C     BENCHMARK WEATHER PREDICTION PROGRAM FOR COMPARING THE
20.	C     PREFORMANCE OF CURRENT SUPERCOMPUTERS. THE MODEL IS
21.	C     BASED OF THE PAPER - THE DYNAMICS OF FINITE-DIFFERENCE
22.	C     MODELS OF THE SHALLOW-WATER EQUATIONS, BY ROBERT SADOURNY
23.	C     J. ATM. SCIENCES, VOL 32, NO 4, APRIL 1975.
24.	C     
25.	C     CODE BY PAUL N. SWARZTRAUBER, NATIONAL CENTER FOR
26.	C     ATMOSPHERIC RESEARCH, BOULDER, CO,  OCTOBER 1984.
27.	C     Modified by Juliana Rew, NCAR, January 2006
28.	      
29.	      integer mlen,nlen
30.	      
31.	      parameter (m_len = 65)
32.	      parameter (n_len = 65)
33.	      integer M,N
34.	      parameter (M = 64)
35.	      parameter (N = 64)
36.	      DIMENSION U(m_len,n_len),V(m_len,n_len),P(m_len,n_len),UNEW(m_len,n_len),VNEW(m_len,n_len),
37.	     1     PNEW(m_len,n_len),UOLD(m_len,n_len),VOLD(m_len,n_len),POLD(m_len,n_len),
38.	     2     CU(m_len,n_len),CV(m_len,n_len),Z(m_len,n_len),H(m_len,n_len),PSI(m_len,n_len)
39.	      
40.	*     REAL MFS100,MFS200,MFS300
41.	      double precision MFS100,MFS200,MFS300
42.	      double precision T100,T200,T300,TSTART,CTIME,TCYC
43.	      integer c1,c2,r,max
44.	      integer ncid,p_id,u_id,v_id
45.	      integer istart(3)
46.	      integer icount(3)
47.	*     prepare netCDF file to receive model output data
48.	      call netcdf_setup(m_len,n_len,ncid,p_id,u_id,v_id,istart,icount)
49.	C     
50.	C     NOTE BELOW THAT TWO DELTA T (TDT) IS SET TO DT ON THE FIRST
51.	C     CYCLE AFTER WHICH IT IS RESET TO DT+DT.
52.	C     
53.	      
54.	      DT = 90.
55.	      TDT = DT
56.	C     
57.	      DX = 1.E5
58.	      DY = 1.E5
59.	      A = 1.E6
60.	      ALPHA = .001
61.	      ITMAX = 4000
62.	      MPRINT = 100
63.	*     could parameterize M and N above
64.	*     to change size of problem easier to change in one spot
65.	*     M = 64
66.	*     N = 64
67.	      MP1 = M+1
68.	      NP1 = N+1
69.	      EL = FLOAT(N)*DX
70.	      PI = 4.*ATAN(1.)
71.	      TPI = PI+PI
72.	      DI = TPI/FLOAT(M)
73.	      DJ = TPI/FLOAT(N)
74.	      PCF = PI*PI*A*A/(EL*EL)
75.	C     
76.	C     INITIAL VALUES OF THE STREAM FUNCTION AND P
77.	C     
78.	      DO J=1,NP1
79.	         DO I=1,MP1
80.	            PSI(I,J) = A*SIN((FLOAT(I)-.5)*DI)*SIN((FLOAT(J)-.5)*DJ)
81.	*     PSI(I,J) = 0
82.	            P(I,J) = PCF*(COS(2.*FLOAT(I-1)*DI)
83.	     1           +COS(2.*FLOAT(J-1)*DJ))+50000.
84.	         END DO
85.	 50   END DO
86.	C     
87.	C     INITIALIZE VELOCITIES
88.	C     
89.	      DO J=1,N
90.	         DO I=1,M
91.	            U(I+1,J) = -(PSI(I+1,J+1)-PSI(I+1,J))/DY
92.	            V(I,J+1) = (PSI(I+1,J+1)-PSI(I,J+1))/DX
93.	         END DO
94.	 60   END DO
95.	C     
96.	C     PERIODIC CONTINUATION
97.	C     
98.	      DO J=1,N
99.	         U(1,J) = U(M+1,J)
100.	         V(M+1,J+1) = V(1,J+1)
101.	 70   END DO
102.	      DO I=1,M
103.	         U(I+1,N+1) = U(I+1,1)
104.	         V(I,1) = V(I,N+1)
105.	 75   END DO
106.	      U(1,N+1) = U(M+1,1)
107.	      V(M+1,1) = V(1,N+1)
108.	      DO J=1,NP1
109.	         DO I=1,MP1
110.	            UOLD(I,J) = U(I,J)
111.	            VOLD(I,J) = V(I,J)
112.	            POLD(I,J) = P(I,J)
113.	         END DO
114.	 86   END DO
115.	C     
116.	C     Initialize CAPITAL  U, CAPITAL V, Z AND H
117.	C     
118.	      FSDX = 4./DX
119.	      FSDY = 4./DY
120	*     T100 = SECOND(DUM)
121.	      call system_clock(count=c1, count_rate=r,count_max=max)
122.	      T100 = c1
123.	      DO J=1,N
124.	         DO I=1,M
125.	            CU(I+1,J) = .5*(P(I+1,J)+P(I,J))*U(I+1,J)
126.	            CV(I,J+1) = .5*(P(I,J+1)+P(I,J))*V(I,J+1)
127.	            Z(I+1,J+1) =(FSDX*(V(I+1,J+1)-V(I,J+1))-FSDY*(U(I+1,J+1)
128.	     1           -U(I+1,J)))/(P(I,J)+P(I+1,J)+P(I+1,J+1)+P(I,J+1))
129.	            H(I,J) = P(I,J)+.25*(U(I+1,J)*U(I+1,J)+U(I,J)*U(I,J)
130.	     1           +V(I,J+1)*V(I,J+1)+V(I,J)*V(I,J))
131.	         END DO
132.	 100  END DO
133.	*     T100 = SECOND(DUM)-T100
134.	      call system_clock(count=c2,count_rate=r,count_max=max)
135.	      T100 = dble(c2-T100)/dble(r)
136.	C     
137.	C     PERIODIC CONTINUATION
138.	C     
139.	      DO J=1,N
140.	         CU(1,J) = CU(M+1,J)
141.	         CV(M+1,J+1) = CV(1,J+1)
142.	         Z(1,J+1) = Z(M+1,J+1)
143.	         H(M+1,J) = H(1,J)
144.	 110  END DO
145.	      DO I=1,M
146.	         CU(I+1,N+1) = CU(I+1,1)
147.	         CV(I,1) = CV(I,N+1)
148.	         Z(I+1,1) = Z(I+1,N+1)
149.	         H(I,N+1) = H(I,1)
150.	 115  END DO
151.	      CU(1,N+1) = CU(M+1,1)
152.	      CV(M+1,1) = CV(1,N+1)
153.	      Z(1,1) = Z(M+1,N+1)
154.	      H(M+1,N+1) = H(1,1)
155.	      
156.	C     
157.	C     PRINT INITIAL VALUES
158.	C     
159.	      WRITE(6,390) N,M,DX,DY,DT,ALPHA
160.	 390  FORMAT("1NUMBER OF POINTS IN THE X DIRECTION",I8,/
161.	     1     " NUMBER OF POINTS IN THE Y DIRECTION",I8,/
162.	     2     " GRID SPACING IN THE X DIRECTION    ",F8.0,/
163.	     3     " GRID SPACING IN THE Y DIRECTION    ",F8.0,/
164.	     4     " TIME STEP                          ",F8.0,/
165.	     5     " TIME FILTER PARAMETER              ",F8.3)
166.	      MNMIN = MIN0(M,N)
167.	      WRITE(6,391) (P(I,I),I=1,MNMIN)
168.	 391  FORMAT(/' INITIAL DIAGONAL ELEMENTS OF P ' //,(8E15.6))
169.	      WRITE(6,392) (U(I,I),I=1,MNMIN)
170.	 392  FORMAT(/' INITIAL DIAGONAL ELEMENTS OF U ' //,(8E15.6))
171.	      WRITE(6,393) (V(I,I),I=1,MNMIN)
172.	 393  FORMAT(/' INITIAL DIAGONAL ELEMENTS OF V ' //,(8E15.6))
173.	      write(6,394) (H(I,I),I=1,MNMIN)
174.	 394  Format(/' INITIAL DIAGONAL ELEMENTS OF H ' //,(8E15.6))
175.	      write(6,395) (Z(I,I),I=1,MNMIN)
176.	 395  Format(/' INITIAL DIAGONAL ELEMENTS OF Z ' //,(8E15.6))
177.	*     write intial values of p, u, and v into a netCDF file
178.	      
179.	      call my_ncwrite(ncid,p_id,istart,icount,p,m_len,n_len)
180.	      call my_ncwrite(ncid,u_id,istart,icount,u,m_len,n_len)
181.	      call my_ncwrite(ncid,v_id,istart,icount,v,m_len,n_len)
182.	*     TSTART = SECOND(DUM)
183.	      call system_clock (count=c1, count_rate=r, count_max=max)
184.	      TSTART = c1
185.	      T300 = 1.
186.	      TIME = 0.
187.	      NCYCLE = 0
188.	*     90 NCYCLE = NCYCLE + 1
189.	      DO ncycle=1,itmax
190.	C     
191.	C     COMPUTE NEW VALUES U,V AND P
192.	C     
193.	         TDTS8 = TDT/8.
194.	         TDTSDX = TDT/DX
195.	         TDTSDY = TDT/DY
196.	*     T200 = SECOND(DUM)
197.	         call system_clock(count=c1, count_rate=r, count_max=max)
198.	         T200 = c1
199.	!$OMP PARALLEL DO private (I,J)
200.	         DO J=1,N
201.	            
202.	            DO I=1,M
203.	               UNEW(I+1,J) = UOLD(I+1,J)+
204.	     1              TDTS8*(Z(I+1,J+1)+Z(I+1,J))*(CV(I+1,J+1)+CV(I,J+1)+CV(I,J)
205.	     2              +CV(I+1,J))-TDTSDX*(H(I+1,J)-H(I,J))
206.	               VNEW(I,J+1) = VOLD(I,J+1)-TDTS8*(Z(I+1,J+1)+Z(I,J+1))
207.	     1              *(CU(I+1,J+1)+CU(I,J+1)+CU(I,J)+CU(I+1,J))
208.	     2              -TDTSDY*(H(I,J+1)-H(I,J))
209.	               PNEW(I,J) = POLD(I,J)-TDTSDX*(CU(I+1,J)-CU(I,J))
210.	     1              -TDTSDY*(CV(I,J+1)-CV(I,J))
211.	            END DO
212.	 200     END DO
213.	*     T200 = SECOND(DUM)-T200
214.	         call system_clock(count=c2, count_rate=r, count_max=max)
215.	         T200 = dble(c2 -T200)/dble(r)
216.	C     
217.	C     PERIODIC CONTINUATION
218.	C     
219.	         DO J=1,N
220.	            UNEW(1,J) = UNEW(M+1,J)
221.	            VNEW(M+1,J+1) = VNEW(1,J+1)
222.	            PNEW(M+1,J) = PNEW(1,J)
223.	 210     END DO
224.	         DO I=1,M
225.	            UNEW(I+1,N+1) = UNEW(I+1,1)
226.	            VNEW(I,1) = VNEW(I,N+1)
227.	            PNEW(I,N+1) = PNEW(I,1)
228.	 215     END DO
229.	         UNEW(1,N+1) = UNEW(M+1,1)
230.	         VNEW(M+1,1) = VNEW(1,N+1)
231.	         PNEW(M+1,N+1) = PNEW(1,1)
232.	*     jr      IF(NCYCLE .GT. ITMAX) CALL EXIT
233.	         
234.	         TIME = TIME + DT
235.	*     jr      IF(MOD(NCYCLE,MPRINT) .NE. 0) GO TO 370
236.	         IF(MOD(NCYCLE,MPRINT) .EQ. 0) then
237.	*     jr  370 IF(NCYCLE .LE. 1) GO TO 310
238.	            
239.	            PTIME = TIME/3600.
240.	            WRITE(6,350) NCYCLE,PTIME
241.	 350        FORMAT(//' CYCLE NUMBER',I5,' MODEL TIME IN  HOURS', F6.2)
242.	            WRITE(6,355) (PNEW(I,I),I=1,MNMIN)
243.	 355        FORMAT(/' DIAGONAL ELEMENTS OF P ' //(8E15.6))
244.	            WRITE(6,360) (UNEW(I,I),I=1,MNMIN)
245.	 360        FORMAT(/' DIAGONAL ELEMENTS OF U ' //(8E15.6))
246.	            WRITE(6,365) (VNEW(I,I),I=1,MNMIN)
247.	 365        FORMAT(/' DIAGONAL ELEMENTS OF V ' //(8E15.6))
248.	*     jr added MFS310--don't know what actual mult factor should be
249.	*     jr changed divide by 1 million to divide by 100K since system_clock
250.	*     jr resolution is millisec rather than cpu_time's 10 millisec
251.	            MFS310 = 24.*M*N/T310/1.D5
252.	*     MFS100 = 24.*M*N/T100/1.E6
253.	            MFS100 = 24.*M*N/T100/1.D5
254.	*     MFS200 = 26.*M*N/T200/1.E6
255.	            MFS200 = 26.*M*N/T200/1.D5
256.	*     MFS300 = 15.*M*N/T300/1.E6
257.	            MFS300 = 15.*M*N/T300/1.D5
258.	*     CTIME = SECOND(DUM)-TSTART
259.	            call system_clock(count=c2, count_rate=r,count_max=max)
260.	            CTIME = dble(c2 - TSTART)/dble(r)
261.	            TCYC = CTIME/FLOAT(NCYCLE)
262.	            WRITE(6,375) NCYCLE,CTIME,TCYC,T310,MFS310,T200,MFS200,T300,MFS300
263.	*     375  FORMAT(' CYCLE NUMBER',I5,' TOTAL COMPUTER TIME', E15.6,
264.	*     1     ' TIME PER CYCLE', E15.6, /
265.	*     2     ' TIME AND MEGAFLOPS FOR LOOP 310 ', E15.6,F6.1/
266.	*     3     ' TIME AND MEGAFLOPS FOR LOOP 200 ', E15.6,F6.1/
267.	*     4     ' TIME AND MEGAFLOPS FOR LOOP 300 ', E15.6,F6.1/ )
268.	 375        FORMAT(' CYCLE NUMBER',I5,' TOTAL COMPUTER TIME', D15.6,
269.	     1           ' TIME PER CYCLE', D15.6, /
270.	     2           ' TIME AND MEGAFLOPS FOR LOOP 310 ', D15.6,2x,D6.1/
271.	     3           ' TIME AND MEGAFLOPS FOR LOOP 200 ', D15.6,2x,D6.1/
272.	     4           ' TIME AND MEGAFLOPS FOR LOOP 300 ', D15.6,2x,D6.1/ )
273.	*     append calculated values of p, u, and v to netCDF file
274.	*     istart(3) = ncycle + 1
275.	            istart(3) = istart(3) + 1
276.	*     shape of record to be written (one ncycle at a time)
277.	            call my_ncwrite(ncid,p_id,istart,icount,p,m_len,n_len)
278.	            call my_ncwrite(ncid,u_id,istart,icount,u,m_len,n_len)
279.	            call my_ncwrite(ncid,v_id,istart,icount,v,m_len,n_len)
280.	         endif
281.	C     
282.	C     TIME SMOOTHING AND UPDATE FOR NEXT CYCLE
283.	C     
284.	*     370 IF(NCYCLE .LE. 1) GO TO 310
285.	 370     IF(NCYCLE .GT. 1) then
286.	*     T300 = SECOND(DUM)
287.	            call system_clock(count=c1,count_rate=r,count_max=max)
288.	            T300 = c1
289.	!     $OMP PARALLEL DO private (i,j)
290.	            DO J=1,N
291.	               
292.	               DO I=1,M
293.	                  UOLD(I,J) = U(I,J)+ALPHA*(UNEW(I,J)-2.*U(I,J)+UOLD(I,J))
294.	                  VOLD(I,J) = V(I,J)+ALPHA*(VNEW(I,J)-2.*V(I,J)+VOLD(I,J))
295.	                  POLD(I,J) = P(I,J)+ALPHA*(PNEW(I,J)-2.*P(I,J)+POLD(I,J))
296.	                  U(I,J) = UNEW(I,J)
297.	                  V(I,J) = VNEW(I,J)
298.	                  P(I,J) = PNEW(I,J)
299.	               END DO
300.	 300        END DO
301.	*     T300 = SECOND(DUM)-T300
302.	            call system_clock(count=c2,count_rate=r, count_max=max)
303.	            T300 = dble(c2 - T300)/dble(r)
304.	C     
305.	C     PERIODIC CONTINUATION
306.	C     
307.	            DO J=1,N
308.	               UOLD(M+1,J) = UOLD(1,J)
309.	               VOLD(M+1,J) = VOLD(1,J)
310.	               POLD(M+1,J) = POLD(1,J)
311.	               U(M+1,J) = U(1,J)
312.	               V(M+1,J) = V(1,J)
313.	               P(M+1,J) = P(1,J)
314.	 320        END DO
315.	            DO I=1,M
316.	               UOLD(I,N+1) = UOLD(I,1)
317.	               VOLD(I,N+1) = VOLD(I,1)
318.	               POLD(I,N+1) = POLD(I,1)
319.	               U(I,N+1) = U(I,1)
320.	               V(I,N+1) = V(I,1)
321.	               P(I,N+1) = P(I,1)
322.	 325        END DO
323.	            UOLD(M+1,N+1) = UOLD(1,1)
324.	            VOLD(M+1,N+1) = VOLD(1,1)
325.	            POLD(M+1,N+1) = POLD(1,1)
326.	            U(M+1,N+1) = U(1,1)
327.	            V(M+1,N+1) = V(1,1)
328.	            P(M+1,N+1) = P(1,1)
329.	C     
330.	C     COMPUTE  NEW CU & CV & Z & H
331.	C     
332.	!     $OMP PARALLEL DO private(i,j)
333.	            DO J=1,N
334.	               
335.	               DO I=1,M
336.	                  CU(I+1,J) = .5*(P(I+1,J)+P(I,J))*U(I+1,J)
337.	                  CV(I,J+1) = .5*(P(I,J+1)+P(I,J))*V(I,J+1)
338.	                  Z(I+1,J+1) = (FSDX*(V(I+1,J+1)-V(I,J+1))-FSDY*(U(I+1,J+1)
339.	     1                 -U(I+1,J)))/(P(I,J)+P(I+1,J)+P(I+1,J+1)+P(I,J+1))
340.	                  H(I,J) = P(I,J)+.25*(U(I+1,J)*U(I+1,J)+U(I,J)*U(I,J)
341.	     1                 +V(I,J+1)*V(I,J+1)+V(I,J)*V(I,J))
342.	               END DO
343.	 510        END DO
344.	C     
345.	C     PERIODIC CONTINUATION of CU & CV &Z & H
346.	C     
347.	            DO J=1,N
348.	               CU(1,J) = CU(M+1,J)
349.	               CV(M+1,J+1) = CV(1,J+1)
350.	               Z(1,J+1) = Z(M+1,J+1)
351.	               H(M+1,J) = H(1,J)
352.	 511        END DO
353.	            DO I=1,M
354.	               CU(I+1,N+1) = CU(I+1,1)
355.	               CV(I,1) = CV(I,N+1)
356.	               Z(I+1,1) = Z(I+1,N+1)
357.	               H(I,N+1) = H(I,1)
358.	 512        END DO
359.	            CU(1,N+1) = CU(M+1,1)
360.	            CV(M+1,1) = CV(1,N+1)
361.	            Z(1,1) = Z(M+1,N+1)
362.	            H(M+1,N+1) = H(1,1)
363.	            
364.	         else
365.	 310        TDT = TDT+TDT
366.	            call system_clock(count=c1, count_rate=r,count_max=max)
367.	            T310 = c1
368.	            DO J=1,NP1
369.	               DO I=1,MP1
370.	                  UOLD(I,J) = U(I,J)
371.	                  VOLD(I,J) = V(I,J)
372.	                  POLD(I,J) = P(I,J)
373.	                  U(I,J) = UNEW(I,J)
374.	                  V(I,J) = VNEW(I,J)
375.	                  P(I,J) = PNEW(I,J)
376.	               END DO
377.	 400        END DO
378.	            call system_clock(count=c2, count_rate=r, count_max=max)
379.	            T310 = dble(c2 - T310)/dble(r)
380.	         endif
381.	         
382.	*     end big loop 90
383.	      End do
384.	*     close the netCDF file
385.	      iret = nf_close(ncid)
386.	      call check_err(iret)
387.	      END
388.	      
389.	*     function second(dum)
390.	*     real time
391.	*     call cpu_time(time)
392.	*     second=time
393.	*     end
394.	      
395.	      subroutine check_err(iret)
396.	      integer iret
397.	      include 'netcdf.inc'
398.	      if(iret .ne. NF_NOERR) then
399.	         print *, nf_strerror(iret)
400.	         stop
401.	      endif
402.	      end
403.	      
404.	      subroutine netcdf_setup(m_len,n_len,ncid,p_id,u_id,v_id,istart,icount)
405.	*     Input args: m_len, n_len
406.	*     Output args: ncid, p_id,u_id,v_id,istart,icount)
407.	      integer m_len,n_len
408.	*     declarations for netCDF library
409.	      include 'netcdf.inc'
410.	*     error status return
411.	      integer iret
412.	*     netCDF id
413.	      integer ncid
414.	*     dimension ids
415.	      integer m_dim
416.	      integer n_dim
417.	      integer time_dim      
418.	*     variable ids
419.	      integer p_id
420.	      integer u_id
421.	      integer v_id
422.	*     rank (number of dimensions) for each variable
423.	      integer p_rank, u_rank, v_rank
424.	      parameter (p_rank = 3)
425.	      parameter (u_rank = 3)
426.	      parameter (v_rank = 3)
427.	*     variable shapes
428.	      integer p_dims(p_rank)
429.	      integer u_dims(u_rank)
430.	      integer v_dims(v_rank)
431.	      integer istart(p_rank)
432.	      integer icount(p_rank)
433.	      
434.	*     enter define mode
435.	      iret = nf_create('shallowdat.nc', NF_CLOBBER,
436.	     +     ncid)
437.	      call check_err(iret)
438.	*     define dimensions
439.	      iret = nf_def_dim(ncid, 'm', m_len, m_dim)
440.	      call check_err(iret)
441.	      iret = nf_def_dim(ncid, 'n', n_len, n_dim)
442.	      call check_err(iret)
443.	      iret = nf_def_dim(ncid, 'time', NF_UNLIMITED, time_dim)
444.	      call check_err(iret)
445.	*     define variables
446.	      p_dims(1) = m_dim
447.	      p_dims(2) = n_dim
448.	      p_dims(3) = time_dim
449.	      iret = nf_def_var(ncid, 'p', NF_REAL, p_rank, p_dims, p_id)
450.	      call check_err(iret)
451.	      u_dims(1) = m_dim
452.	      u_dims(2) = n_dim
453.	      u_dims(3) = time_dim
454.	      iret = nf_def_var(ncid, 'u', NF_REAL, u_rank, u_dims, u_id)
455.	      call check_err(iret)
456.	      v_dims(1) = m_dim
457.	      v_dims(2) = n_dim
458.	      v_dims(3) = time_dim
459.	      iret = nf_def_var(ncid, 'v', NF_REAL, v_rank, v_dims, v_id)
460.	      call check_err(iret)
461.	*     start netCDF write at the first (1,1,1) position of the array
462.	      istart(1) = 1
463.	      istart(2) = 1
464.	      istart(3) = 1
465.	*     shape of record to be written (one ncycle at a time)
466.	      icount(1) = m_len
467.	      icount(2) = n_len
468.	      icount(3) = 1
469.	      
470.	*     leave define mode
471.	      iret = nf_enddef(ncid)
472.	      call check_err(iret)
473.	      
474.	*     end of netCDF definitions
475.	      end subroutine netcdf_setup
476.	      
477.	      subroutine my_ncwrite(id,varid,istart,icount,var,m_len,n_len)
478.	*     Input args: id, varid,istart,icount,var,m_len_n_len
479.	*     Write a whole array out to the netCDF file
480.	      integer id,varid,iret
481.	      integer icount(3)
482.	      integer istart(3)
483.	      real var (m_len,n_len)
484.	      iret = nf_put_vara_real(id,varid,istart,icount,var)
485.	      call check_err(iret)
486.	      
487.	      end subroutine my_ncwrite

Line 15: OpenMP is a standard, portable programming interface that allows work to be split into "threads" that compute in parallel. It is generally most useful in the context of "shared memory" (SPMD) parallel programming.

For example, on a shared-memory architecture such as the SGI Origin 3400, OpenMP is very efficient. An architecture such as the IBM SP is organized into "nodes," each containing 4-16 processors. OpenMP is efficient on this architecture as long as the work stays on the node, where it shares memory with the other processors. With an architecture such as a Linux cluster, nodes typically consist or only one or two processors. OpenMP is less useful on these architectures.

Line 199: Note that we are parallelizing only the outer (J) loop, which is doing the bulk of the work.

If you have questions about this document, please contact SCD Customer Support. You can also reach us by telephone 24 hours a day, seven days a week at 303-497-1278. Additional contact methods: consult1@ucar.edu and during business hours in NCAR Mesa Lab Suite 39.

© Copyright 2006. University Corporation for Atmospheric Research (UCAR). All Rights Reserved.