|
Lightning user doc contents
|
Hybrid parallel job example
This example uses both MPI SPMD parallel processes and OpenMP threads
to sum a set of 1,000,000 exponentials. A "hybrid" job combines two types
of parallel computing tasks: SPMD (Single Processor Multiple Data
structures) or MPMD (Multiple Processors Multiple Data structures)
parallel processes where one or more processes use threads. The
Message Passing Interface is a standardized programming paradigm
for parallel computers designed to allow separate processes to
efficiently communicate with each other via messages.
This example consists of four parts:
- A script for the LSF batch subsystem that submits the job
to lightning
- Fortran code that runs the example
- C code that runs the same example
- C++ code that runs the same example
To run this example, you have two choices:
In the /usr/local/examples/lsf/batch/ directory on lightning:
cp mix.* $PWD
Submit the example codes to the LSF batch subsystem by entering:
bsub < mix.lsf
Copy the codes on this page and paste them into your own
files:
- Copy the
LSF batch job script below and paste it into
a file named mix.lsf in your working directory on
lightning.
- Copy the
Fortran code below and paste it into a file named
mix.f in your working directory on lightning.
- Copy the
C code below and paste it into a file named
mix.c in your working directory on lightning.
- Copy the
C++ code below and paste it into a file named
mix.cc in your working directory on lightning.
- Submit the example codes to the LSF batch subsystem by
entering:
bsub < mix.lsf
Studying this example will help you prepare your own hybrid jobs
for submittal to lightning via LSF.
#!/bin/ksh
#
# LSF batch script to run the test mixed MPI/OMP codes
#
#BSUB -a mpich_gm # select mpich_gm elim
#BSUB -x # exclusive use of node
#BSUB -n 2 # sum of number of tasks
#BSUB -R "span[ptile=1]" # number of processes per node
#BSUB -o mixlsf.out # output filename
#BSUB -e mixlsf.err # error filename
#BSUB -J mixlsf.test # job name
#BSUB -q regular # queue
#
#Build pgfile for mix run
rm -f pgfile
touch pgfile
#
EXE=/mix
#
echo $LSB_HOSTS
j=0
for h in `echo $LSB_HOSTS`
do
echo " "" " >> pgfile
j=`expr $j + 1`
done
#
# Fortran example
mpif90 -Mextend -mp -lmp -o mix mix.f
export OMP_NUM_THREADS=1
mpirun-env.pl -pg pgfile $EXE
export OMP_NUM_THREADS=2
mpirun-env.pl -pg pgfile $EXE
# C example
mpicc -mp -o mix mix.c
export OMP_NUM_THREADS=1
mpirun-env.pl -pg pgfile $EXE
export OMP_NUM_THREADS=2
mpirun-env.pl -pg pgfile $EXE
# C++ example
mpicxx --no_auto_instantiation -mp -o mix mix.cc
export OMP_NUM_THREADS=1
mpirun-env.pl -pg pgfile $EXE
export OMP_NUM_THREADS=2
mpirun-env.pl -pg pgfile $EXE
rm pgfile
program main
! use omp_lib # pgf90 does not support openmp 2.0
! # so this use must be commented out
implicit none
external omp_get_thread_num ! use this kludge for pgi instead of 'use omp_lib'
integer omp_get_thread_num
include 'mpif.h'
integer i
integer rank,error,tag,length,status(MPI_STATUS_SIZE)
integer hz, clock0, clock1, t
real(kind=8):: sum, buf(2), elapsed
character*4 env_vbl
character (len=MPI_MAX_PROCESSOR_NAME) nodename
integer name_len, ierr
call mpi_init(error)
call mpi_comm_rank(MPI_COMM_WORLD,rank,error)
length=2
call getenv('OMP_NUM_THREADS', env_vbl)
print *,'rank = ', rank, ' OMP_NUM_THREADS = ', env_vbl
call system_clock(count_rate = hz)
call system_clock(count = clock0)
if (rank .eq. 1) then
sum=0.0
!$omp parallel
print 12, omp_get_thread_num()
12 format(' OMP thread number ',i4)
!$omp do reduction(+:sum)
do i=1,1000000
sum=sum+exp(.00000001*i)
end do
!$omp enddo
!$omp end parallel
call system_clock(count = clock1)
elapsed = real(clock1 - clock0) / hz
buf(1)=sum
buf(2)=elapsed/1000.0
call mpi_send(buf,length,MPI_REAL8,0,tag,MPI_COMM_WORLD,error)
call MPI_GET_PROCESSOR_NAME(nodename, name_len, ierr)
print *,'xlf MPI task ', rank,' sending data from node ', nodename(1:name_len)
else
call mpi_recv(buf,length,MPI_REAL8,1,tag,MPI_COMM_WORLD,status,error)
call MPI_GET_PROCESSOR_NAME(nodename, name_len, ierr)
print *,'xlf MPI task ', rank,' receiving data on node ', nodename(1:name_len)
print 10, buf(1), buf(2)
10 format(' xlf mix Results: Sum = ',1pe12.6,' Loop time = ',0pf8.6)
end if
call mpi_finalize(error)
stop
end
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <omp.h>
#include <mpi.h>
main(int argc,char **argv)
{
MPI_Status status;
int i, rank,error,tag=42,length=2;
double buf[2], sum=0.0, elapsed, rtc();
int name_len;
char nodename[MPI_MAX_PROCESSOR_NAME];
error = MPI_Init(&argc,&argv);
error=MPI_Comm_rank(MPI_COMM_WORLD,&rank);
if(rank==1)
{
elapsed=rtc();
/* Tell the compiler to divide the following loop into parallel threads: */
#pragma omp parallel
{
#pragma omp critical
printf("OMP thread number %4d \n", omp_get_thread_num());
#pragma omp for reduction(+:sum)
for(i=1; i<1000000; i++)
{
sum += exp( .00000001 * (double)i );
}
}
elapsed=rtc()-elapsed;
buf[0]=sum;
buf[1]=elapsed;
error = MPI_Send(&buf, length, MPI_DOUBLE,0,tag,MPI_COMM_WORLD);
MPI_Get_processor_name(nodename, &name_len);
printf(" c MPI task %4d data sent from node %s\n", rank, nodename);
}
else
{
error=MPI_Recv(&buf,length,MPI_DOUBLE,1,tag,MPI_COMM_WORLD,&status);
MPI_Get_processor_name(nodename, &name_len);
printf(" c MPI task %4d data received on node %s\n", rank, nodename);
printf(" c mix Results: Sum = %11e Loop time = %8f \n",buf[0],buf[1] );
}
error=MPI_Finalize();
exit (0);
}
double rtc()
{
static struct timeval hack;
static long microseconds;
gettimeofday(&hack,NULL);
microseconds=hack.tv_sec*1000000+hack.tv_usec;
return(((double)microseconds)/1000000.);
}
#include <iostream>
#include <string>
#include <math.h>
#include <sys/time.h>
#include <omp.h>
#include <mpi.h>
using namespace std;
//--------------Class Defs-----------------------------------------
class exp_sum {
public:
double elapsed;
double sum();
private:
double summer;
struct timeval time;
double rtc();
};
double exp_sum::sum()
{
int i;
summer=0.0;
elapsed=rtc();
#pragma omp parallel
{
#pragma omp critical
cout << "Thread number " << omp_get_thread_num() << endl;
//
// kludge for pgiCC: had to add ddd as a temp vbl
double ddd;
ddd = 0.0;
#pragma omp for reduction (+: ddd)
for(i=1; i<1000000; i++)
{
ddd += exp( .00000001 * (double)i );
}
summer += ddd;
};
// for AIX
//#pragma omp for reduction (+: summer)
// for(i=1; i<1000000; i++)
// {
// summer += exp( .00000001 * (double)i );
// };
// };
elapsed=exp_sum::rtc()-elapsed;
return summer;
};
double exp_sum:: rtc()
{
gettimeofday(&time,NULL);
return ( (double)(time.tv_sec*1000000+time.tv_usec)/1000000 );
};
//-----Main Program--------------------------------------------------------
int main(int argc,char **argv)
{
exp_sum total;
MPI_Status status;
int rank,error,tag=99,length=2;
double buf[2];
int name_len;
char nodename[MPI_MAX_PROCESSOR_NAME];
error = MPI_Init(&argc,&argv);
error=MPI_Comm_rank(MPI_COMM_WORLD,&rank);
if(rank==1){
buf[0]=total.sum();
buf[1]=total.elapsed;
error = MPI_Send(&buf, length, MPI_DOUBLE,0,tag,MPI_COMM_WORLD);
MPI_Get_processor_name(nodename, &name_len);
cout << "xlcc MPI task " << rank << " data sent from node " << \
nodename << endl;
}
else
{
error=MPI_Recv(&buf,length,MPI_DOUBLE,1,tag,MPI_COMM_WORLD,&status);
MPI_Get_processor_name(nodename, &name_len);
cout << "xlcc MPI task " << rank << " data received on node " << \
nodename << endl;
cout << "xlcc mix Results: Sum = " << buf[0] << " Loop time = " << buf[1] << '\n';
}
error=MPI_Finalize();
return 0;
}
Next page |
IBM Linux cluster systems fundamentals -
Table of contents
If you have questions about this document, please contact
SCD Customer Support.
You can also reach us by telephone 24 hours a day, seven days a week at
303-497-1278.
Additional contact methods:
consult1@ucar.edu
and during
business hours
in NCAR Mesa Lab Suite 39.
© Copyright 2005. University Corporation for Atmospheric
Research (UCAR). All Rights Reserved.
Address of this page:
http://www.scd.ucar.edu/docs/lightning/examples/hybrid.jsp
|