lightning user document heading  
NCAR
Last update: 02/11/2005

Lightning user doc contents

Threaded job example (simple parallelism, no message passing)

This example uses threads to sum a set of 1,000,000 exponentials. These threads comply with the OpenMP standard. OpenMP is a portable, scalable Application Program Interface (API) that gives shared-memory parallel programmers a simple and flexible interface for developing parallel applications for all architectures.

Note the parallel do or for loop directive in the codes (flagged with comments) that divides the loop into parallel threads.

This example consists of four parts:

  • A script for the LSF batch subsystem that submits the job to lightning
  • Fortran code that runs the example
  • C code that runs the same example
  • C++ code that runs the same example

To run this example, you have two choices:

  • In the /usr/local/examples/lsf/batch/ directory on lightning:
    cp omp.* $PWD
    Submit the example codes to the LSF batch subsystem by entering:
    bsub < omp.lsf

  • Copy the codes on this page and paste them into your own files:

    1. Copy the LSF batch job script below and paste it into a file named omp.lsf in your working directory on lightning.
    2. Copy the Fortran code below and paste it into a file named omp.f in your working directory on lightning.
    3. Copy the C code below and paste it into a file named omp.c in your working directory on lightning.
    4. Copy the C++ code below and paste it into a file named omp.cc in your working directory on lightning.
    5. Submit the example codes to the LSF batch subsystem by entering:
      bsub < omp.lsf

Studying this example will help you prepare your own threaded jobs for submittal to lightning via LSF.

LSF batch job script to submit the threaded job

#!/bin/ksh
#
# LSF script to run an OMP code
#
#BSUB -a openmp                         # select openmp elim
#BSUB -x                                # exclusive use of node
#BSUB -n 4                              # number of tasks
#BSUB -R "span[ptile=2]"                # jobs run two hosts per host
#BSUB -o omplsf.out                     # ouput filename
#BSUB -e omplsf.err                     # input filename
#BSUB -J omplsf.test                    # job name
#BSUB -q regular                        # queue

# Fortran example
pgf90 -o ompf -Mextend -mp omp.f
export OMP_NUM_THREADS=1
./ompf
echo ''
export OMP_NUM_THREADS=2
./ompf
echo ''
# Cases 3 & 4 commented out since lightning max threads/node = 2
#export OMP_NUM_THREADS=3
#./ompf
#echo ''
#export OMP_NUM_THREADS=4
#./ompf
rm ompf
echo ""

# C example
pgcc -mp -o ompc omp.c
echo ''
export OMP_NUM_THREADS=1
./ompc
echo ''
export OMP_NUM_THREADS=2
./ompc
echo ''
# Cases 3 & 4 commented out since lightning max threads/node = 2
#export OMP_NUM_THREADS=3
#./ompc
#echo ''
#export OMP_NUM_THREADS=4
#./ompc
rm ompc
echo ""

# C++ example
pgCC --no_auto_instantiation -mp -o ompcc omp.cc
export OMP_NUM_THREADS=1
./ompcc
echo ''
export OMP_NUM_THREADS=2
./ompcc
echo ''
# Cases 3 & 4 commented out since lightning max threads/node = 2
#export OMP_NUM_THREADS=3
#./ompcc
#echo ''
#export OMP_NUM_THREADS=4
#./ompcc
rm ../bin/itompcc
echo ""

Threaded job example code in Fortran

      program main
!     use omp_lib          # pgf90 does not support openmp 2.0
!                          # so this use must be commented out
      implicit none
      external omp_get_thread_num   ! use this kludge for pgi instead of 'use omp_lib'
      integer  omp_get_thread_num
      integer i
      integer hz, clock0, clock1, t
      real(kind=8):: elapsed, sum

      sum=0.0
      call system_clock(count_rate = hz)
      call system_clock(count = clock0)
!$omp parallel
      print 12, omp_get_thread_num()
12    format(' OMP thread number ',i4)
!$omp do reduction(+:sum)
      do i=1,1000000
         sum=sum+exp(.00000001*i)
      end do
!$omp enddo
!$omp end parallel
      call system_clock(count = clock1)
      elapsed = real(clock1 - clock0) / hz
      print 10, sum, elapsed/1000.0
10    format('  xlf OMP Results: Sum = ',1pe12.6,' Loop time = ',0pf12.8)
      stop
      end

Threaded job example code in C

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <omp.h>
main()
{
        int i;
        double sum=0.0, elapsed, rtc();

        elapsed=rtc();

#pragma omp parallel
        {
#pragma omp critical
                printf("OMP thread number %4d\n", omp_get_thread_num());
#pragma omp for reduction (+:sum)
                for(i=1; i<=1000000; i++)
                {
                        sum += exp( .00000001 * (double)i );
                }
        }
        elapsed=rtc()-elapsed;
        printf( "   c OMP Results: Sum = %11e Loop time =  %8f  \n",sum,elapsed );
        exit (0);
}
double rtc()
{
        struct timeval time;
        gettimeofday(&time,NULL);
        return ( (double)(time.tv_sec)+(double)(time.tv_usec)/1000000.0 );
}

Threaded job example code in C++

#include <iostream>
#include <string>
#include <math.h>
#include <stdlib.h>
#include <sys/time.h>
#include <omp.h>
using namespace std;

//---------Class Defs--------------------------------------------------
class exp_sum {
public:
        double elapsed;
        double sum();
private:
        double summer;
        struct timeval time;
        double rtc();
};

double exp_sum::sum()
{
        int i;
        summer=0.0;
        elapsed=rtc();
#pragma omp parallel
        {
#pragma omp critical
                cout << "OMP thread number " << omp_get_thread_num()  << endl;
//
// kludge for pgiCC: had to add ddd as a temp vbl
//
                double ddd;
                ddd = 0.0;
  #pragma omp for reduction (+: ddd)
                 for(i=1; i<1000000; i++)
                 {
                         ddd += exp( .00000001 * (double)i );
                 }
                 summer += ddd;
       };

// for AIX
//#pragma omp for reduction (+: summer)
//              for(i=1; i<1000000; i++)
//              {
//                      summer += exp( .00000001 * (double)i );
//              };
//      };
        elapsed=rtc()-elapsed;
        return summer;
};

double exp_sum:: rtc()
{
        gettimeofday(&time,NULL);
        return ( (double)(time.tv_sec)+(double)(time.tv_usec)/1000000.0 );
};

//-----------Main Program---------------------------------------------
int main()
{
        exp_sum total;
        double value;
        value=total.sum();

        cout << scientific << "xlcc OMP Results: Sum = " << value << " Loop time  = "
                                                         << total.elapsed << endl;
};

Next page | IBM Linux cluster systems fundamentals - Table of contents

If you have questions about this document, please contact SCD Customer Support. You can also reach us by telephone 24 hours a day, seven days a week at 303-497-1278. Additional contact methods: consult1@ucar.edu and during business hours in NCAR Mesa Lab Suite 39.

© Copyright 2005. University Corporation for Atmospheric Research (UCAR). All Rights Reserved.

Address of this page: http://www.scd.ucar.edu/docs/lightning/examples/threaded.jsp