Example ACES queue MPI jobs
PBS parallel jobs use either the TCP-based MPI communications (over Gigabit or Fast Ethernet depending on the node) or a faster network (e.g. Myrinet) if available.
In all of the shell scripts below the assumption of a C-shell family shell is made (csh/tcsh). In the event a Bourne family shell is used (sh/bash/ksh/zsh) the shell execution lines at the top of the script is to change accordingly and any "setenv FOO BAR" statements are to become "FOO=BAR; export FOO".
Using MPICH and an executable that was compiled with the corresponding (Intel/Portland/GNU) MPICH compiler drivers. The recommended way is to use mpiexec to spawn jobs.
#!/bin/csh
# invoking mpiexec on ACES Linux clusters
#
# All PBS options start as "#PBS " and can be specified on the command line
# after qsub instead of being embedded in the script file.
#----------------------------------------------
# o Queue name
# -q queue
# Parallel queues available on itrda are:
# four (2hours,16nodes),four-twelve (12hours,26nodes),long (168hours,64nodes)
#PBS -q four
#----------------------------------------------
# o Job name instead of the PBS script filename
# -N Job name (use a distinguishing name)
#PBS -N MyNameMPICH
#----------------------------------------------
# o Resource lists
# -l resource lists, separated by a ","
# To ask for N nodes use "nodes=N"
# To ask for 2 processor per node use ":ppn=2", otherwise ":ppn=1"
# after the nodes=N. Preferably use ppn=2 and ask for less nodes.
# To ask for Myrinet use ":myrinet", for Gigabit Ethernet use ":gigabit"
# after the nodes=N:ppn=M
# To specify total wallclock time use "walltime=hh:mm:ss"
#PBS -l nodes=16:ppn=1,walltime=00:10:00
#----------------------------------------------
# o stderr/out combination
# -j {eo|oe}
# Causes the standard error and standard output to be combined in one file.
# For standard output to be added to standard error use "eo"
# For standard error to be added to standard output use "oe"
#
# o stderr/out (specify them instead if getting script.[oe]$PBS_JOBID
# -e standard error file
# -o standard output file
# You can append ${PBS_JOBID} to ensure distict filenames
#PBS -e myrunMPICH.stderr
#PBS -o myrunMPICH.stdout
#----------------------------------------------
# o Starting time
# -a time
# Declares the time after which the job is eligible for execution.
#----------------------------------------------
# o User notification
# -m {a|b|e}
# Send mail to the user when:
# job aborts: "a", job begins running: "b", job ends: "e"
#PBS -m ae
#----------------------------------------------
# o Exporting of environment
# -V export all my environment var's
#PBS -V
#----------------------------------------------
# Begin execution
#
# Check the environment variables
#
#printenv
#
# Get the right MPICH module
# Different modules for the Intel, Portland Group or GNU compilers
#
#module add mpich/1.2.6/intel
#module add mpich/1.2.6/pgi
#module add mpich/1.2.6/gnu
#module add mpich/1.2.7p1/intel
#module add mpich/1.2.7p1/pgi
module add mpich/1.2.7p1/gnu
# Need this!
module add mpiexec
setenv PBS_DEFAULT `echo $PBS_NODEFILE | awk -F. '{print $2}'`
#
# get PBS node info
#
echo $PBS_NODEFILE
cat $PBS_NODEFILE
#----------------------------------------------
# cd to the working directory from which the job was submitted
#
cd $PBS_O_WORKDIR
# How many procs do I have?
setenv NP `wc -l $PBS_NODEFILE | awk '{print $1}'`
# Create uniq hostfile for use in hybrid (MPI/OpenMP) codes and for rsh-script use
uniq $PBS_NODEFILE > machinefile.uniq.$PBS_JOBID
# How many nodes do I have?
setenv NPU `wc -l machinefile.uniq.$PBS_JOBID | awk '{print $1}'`
#
# Run the MPI code called "executable", provided it is in PBS_O_WORKDIR
#
mpiexec -comm p4 -n $NP ./executable
# or if you want to use all processors anyway in which case you do not
# have to figure out what NP is above:
mpiexec -comm p4 ./executable
# Cleanup
# Remove the unique machinefiles
rm $PBS_O_WORKDIR/machinefile.uniq.$PBS_JOBID
#
# Exit (not strictly necessary)
#
exit
Using MPICH and an executable that was compiled with the corresponding (Intel/Portland/GNU) MPICH compiler drivers. This is NOT the recommended way to launch MPICH jobs on the clusters!
#!/bin/csh
# invoking mpirun on ACES Linux clusters
#
# All PBS options start as "#PBS " and can be specified on the command line
# after qsub instead of being embedded in the script file.
#----------------------------------------------
# o Queue name
# -q queue
# Parallel queues available on itrda are:
# four (2hours,16nodes),four-twelve (12hours,26nodes),long (168hours,64nodes)
#PBS -q four
#----------------------------------------------
# o Job name instead of the PBS script filename
# -N Job name (use a distinguishing name)
#PBS -N MyNameMPICH
#----------------------------------------------
# o Resource lists
# -l resource lists, separated by a ","
# To ask for N nodes use "nodes=N"
# To ask for 2 processor per node use ":ppn=2", otherwise ":ppn=1"
# after the nodes=N. Preferably use ppn=2 and ask for less nodes.
# To ask for Myrinet use ":myrinet", for Gigabit Ethernet use ":gigabit"
# after the nodes=N:ppn=M
# To specify total wallclock time use "walltime=hh:mm:ss"
#PBS -l nodes=16:ppn=1,walltime=00:10:00
#----------------------------------------------
# o stderr/out combination
# -j {eo|oe}
# Causes the standard error and standard output to be combined in one file.
# For standard output to be added to standard error use "eo"
# For standard error to be added to standard output use "oe"
#
# o stderr/out (specify them instead if getting script.[oe]$PBS_JOBID
# -e standard error file
# -o standard output file
# You can append ${PBS_JOBID} to ensure distict filenames
#PBS -e myrunMPICH.stderr
#PBS -o myrunMPICH.stdout
#----------------------------------------------
# o Starting time
# -a time
# Declares the time after which the job is eligible for execution.
#----------------------------------------------
# o User notification
# -m {a|b|e}
# Send mail to the user when:
# job aborts: "a", job begins running: "b", job ends: "e"
#PBS -m ae
#----------------------------------------------
# o Exporting of environment
# -V export all my environment var's
#PBS -V
#----------------------------------------------
# Begin execution
#
# Check the environment variables
#
#printenv
#
# Get the right MPICH module
# Different modules for the Intel, Portland Group or GNU compilers
#
#module add mpich/1.2.6/intel
#module add mpich/1.2.6/pgi
#module add mpich/1.2.6/gnu
#module add mpich/1.2.7p1/intel
#module add mpich/1.2.7p1pgi
module add mpich/1.2.7p1/gnu
#
# get PBS node info
#
echo $PBS_NODEFILE
cat $PBS_NODEFILE
#----------------------------------------------
# cd to the working directory from which the job was submitted
#
cd $PBS_O_WORKDIR
# How many procs do I have?
setenv NP `wc -l $PBS_NODEFILE | awk '{print $1}'`
# Create uniq hostfile for use in hybrid (MPI/OpenMP) codes and for rsh-script use
uniq $PBS_NODEFILE > machinefile.uniq.$PBS_JOBID
# How many nodes do I have?
setenv NPU `wc -l machinefile.uniq.$PBS_JOBID | awk '{print $1}'`
#
# Run the MPI code called "executable", provided it is in PBS_O_WORKDIR
#
mpirun -machinefile $PBS_NODEFILE -np $NP ./executable
# Cleanup
# Remove the unique machinefiles
rm $PBS_O_WORKDIR/machinefile.uniq.$PBS_JOBID
#
# Exit (not strictly necessary)
#
exit
Using MPICH-GM and an executable that was compiled with the corresponding (Intel/Portland/GNU) MPICH compiler drivers. The recommended way is to use mpiexec to spawn jobs.
#!/bin/csh
# invoking mpiexec on ACES Linux clusters
#
# All PBS options start as "#PBS " and can be specified on the command line
# after qsub instead of being embedded in the script file.
#----------------------------------------------
# o Queue name
# -q queue
# Parallel queues available on itrda are:
# four (2hours,16nodes),four-twelve (12hours,26nodes),long (168hours,64nodes)
#PBS -q four
#----------------------------------------------
# o Job name instead of the PBS script filename
# -N Job name (use a distinguishing name)
#PBS -N MyNameMPICH-GM
#----------------------------------------------
# o Resource lists
# -l resource lists, separated by a ","
# To ask for N nodes use "nodes=N"
# To ask for 2 processor per node use ":ppn=2", otherwise ":ppn=1"
# after the nodes=N. Preferably use ppn=2 and ask for less nodes.
# To ask for Myrinet use ":myrinet", for Gigabit Ethernet use ":gigabit"
# after the nodes=N:ppn=M
# To specify total wallclock time use "walltime=hh:mm:ss"
#PBS -l nodes=16:ppn=1:myrinet,walltime=00:10:00
#----------------------------------------------
# o stderr/out combination
# -j {eo|oe}
# Causes the standard error and standard output to be combined in one file.
# For standard output to be added to standard error use "eo"
# For standard error to be added to standard output use "oe"
#
# o stderr/out (specify them instead if getting script.[oe]$PBS_JOBID
# -e standard error file
# -o standard output file
# You can append ${PBS_JOBID} to ensure distict filenames
#PBS -e myrunMPICH-GM.stderr
#PBS -o myrunMPICH-GM.stdout
#----------------------------------------------
# o Starting time
# -a time
# Declares the time after which the job is eligible for execution.
#----------------------------------------------
# o User notification
# -m {a|b|e}
# Send mail to the user when:
# job aborts: "a", job begins running: "b", job ends: "e"
#PBS -m ae
#----------------------------------------------
# o Exporting of environment
# -V export all my environment var's
#PBS -V
#----------------------------------------------
# Begin execution
#
# Check the environment variables
#
#printenv
#
# Get the right MPICH-GM (for Myrinet communications) module
# Different modules for the Intel, Portland Group or GNU compilers
#
#module add mpich-gm/intel
#module add mpich-gm/pgi
module add mpich-gm/gnu
# Need this!
module add mpiexec
setenv PBS_DEFAULT `echo $PBS_NODEFILE | awk -F. '{print $2}'`
#
# get PBS node info
#
echo $PBS_NODEFILE
cat $PBS_NODEFILE
#----------------------------------------------
# cd to the working directory from which the job was submitted
#
cd $PBS_O_WORKDIR
# How many procs do I have?
setenv NP `wc -l $PBS_NODEFILE | awk '{print $1}'`
# Create uniq hostfile for use in hybrid (MPI/OpenMP) codes and for rsh-script use
uniq $PBS_NODEFILE > machinefile.uniq.$PBS_JOBID
# How many nodes do I have?
setenv NPU `wc -l machinefile.uniq.$PBS_JOBID | awk '{print $1}'`
#
# Run the MPI code called "executable", provided it is in PBS_O_WORKDIR
#
mpiexec -comm gm -n $NP ./executable
# or if you want to use all processors anyway in which case you do not
# have to figure out what NP is above:
mpiexec -comm gm ./executable
# Cleanup
# Remove the unique machinefiles
rm $PBS_O_WORKDIR/machinefile.uniq.$PBS_JOBID
#
# Exit (not strictly necessary)
#
exit
Using MPICH-GM and an executable that was compiled with the corresponding (Intel/Portland/GNU) MPICH compiler drivers. This is NOT the recommended way to launch MPICH-GM jobs on the clusters!
#!/bin/csh
# invoking mpirun on ACES Linux clusters
#
# All PBS options start as "#PBS " and can be specified on the command line
# after qsub instead of being embedded in the script file.
#----------------------------------------------
# o Queue name
# -q queue
# Parallel queues available on itrda are:
# four (2hours,16nodes),four-twelve (12hours,26nodes),long (168hours,64nodes)
#PBS -q four
#----------------------------------------------
# o Job name instead of the PBS script filename
# -N Job name (use a distinguishing name)
#PBS -N MyNameMPICH-GM
#----------------------------------------------
# o Resource lists
# -l resource lists, separated by a ","
# To ask for N nodes use "nodes=N"
# To ask for 2 processor per node use ":ppn=2", otherwise ":ppn=1"
# after the nodes=N. Preferably use ppn=2 and ask for less nodes.
# To ask for Myrinet use ":myrinet", for Gigabit Ethernet use ":gigabit"
# after the nodes=N:ppn=M
# To specify total wallclock time use "walltime=hh:mm:ss"
#PBS -l nodes=16:ppn=1:myrinet,walltime=00:10:00
#----------------------------------------------
# o stderr/out combination
# -j {eo|oe}
# Causes the standard error and standard output to be combined in one file.
# For standard output to be added to standard error use "eo"
# For standard error to be added to standard output use "oe"
#
# o stderr/out (specify them instead if getting script.[oe]$PBS_JOBID
# -e standard error file
# -o standard output file
# You can append ${PBS_JOBID} to ensure distict filenames
#PBS -e myrunMPICH-GM.stderr
#PBS -o myrunMPICH-GM.stdout
#----------------------------------------------
# o Starting time
# -a time
# Declares the time after which the job is eligible for execution.
#----------------------------------------------
# o User notification
# -m {a|b|e}
# Send mail to the user when:
# job aborts: "a", job begins running: "b", job ends: "e"
#PBS -m ae
#----------------------------------------------
# o Exporting of environment
# -V export all my environment var's
#PBS -V
#----------------------------------------------
# Begin execution
#
# Check the environment variables
#
#printenv
#
# Get the right MPICH or MPICH-GM (for Myrinet communications) module
# Different modules for the Intel, Portland Group or GNU compilers
#
#module add mpich-gm/intel
#module add mpich-gm/pgi
module add mpich-gm/gnu
#
# get PBS node info
#
echo $PBS_NODEFILE
cat $PBS_NODEFILE
#----------------------------------------------
# cd to the working directory from which the job was submitted
#
cd $PBS_O_WORKDIR
# How many procs do I have?
setenv NP `wc -l $PBS_NODEFILE | awk '{print $1}'`
# Create uniq hostfile for use in hybrid (MPI/OpenMP) codes and for rsh-script use
uniq $PBS_NODEFILE > machinefile.uniq.$PBS_JOBID
# How many nodes do I have?
setenv NPU `wc -l machinefile.uniq.$PBS_JOBID | awk '{print $1}'`
#
# Run the MPI code called "executable", provided it is in PBS_O_WORKDIR
#
mpirun -machinefile $PBS_NODEFILE -np $NP ./executable
# Cleanup
# Remove the unique machinefiles
rm $PBS_O_WORKDIR/machinefile.uniq.$PBS_JOBID
#
# Exit (not strictly necessary)
#
exit
Using LAM and an executable that was compiled with the corresponding compiler (Intel/Portland/GNU) drivers. One may use mpirun or the MPI-2 standard based mpiexec included in LAM to spawn jobs.
#!/bin/csh
# invoking mpirun/mpiexec on ACES Linux clusters
#
# All PBS options start as "#PBS " and can be specified on the command line
# after qsub instead of being embedded in the script file.
#----------------------------------------------
# o Queue name
# -q queue
# Parallel queues available on itrda are:
# four (2hours,16nodes),four-twelve (12hours,26nodes),long (168hours,64nodes)
#PBS -q four
#----------------------------------------------
# o Job name instead of the PBS script filename
# -N Job name (use a distinguishing name)
#PBS -N MyNameLAM
#----------------------------------------------
# o Resource lists
# -l resource lists, separated by a ","
# To ask for N nodes use "nodes=N"
# To ask for 2 processor per node use ":ppn=2", otherwise ":ppn=1"
# after the nodes=N. Preferably use ppn=2 and ask for less nodes.
# To ask for Myrinet use ":myrinet", for Gigabit Ethernet use ":gigabit"
# after the nodes=N:ppn=M
# To specify total wallclock time use "walltime=hh:mm:ss"
#PBS -l nodes=16:ppn=1,walltime=00:10:00
#----------------------------------------------
# o stderr/out combination
# -j {eo|oe}
# Causes the standard error and standard output to be combined in one file.
# For standard output to be added to standard error use "eo"
# For standard error to be added to standard output use "oe"
#
# o stderr/out (specify them instead if getting script.[oe]$PBS_JOBID
# -e standard error file
# -o standard output file
# You can append ${PBS_JOBID} to ensure distict filenames
#PBS -e myrunLAM.stderr
#PBS -o myrunLAM.stdout
#----------------------------------------------
# o Starting time
# -a time
# Declares the time after which the job is eligible for execution.
#----------------------------------------------
# o User notification
# -m {a|b|e}
# Send mail to the user when:
# job aborts: "a", job begins running: "b", job ends: "e"
#PBS -m ae
#----------------------------------------------
# o Exporting of environment
# -V export all my environment var's
#PBS -V
#----------------------------------------------
# Begin execution
#
# Check the environment variables
#
#printenv
#
# Get the right LAM module
# Different modules for the Intel, Portland Group or GNU compilers
#
#module add lam/intel
#module add lam/pgi
module add lam/gnu
#
# get PBS node info
#
echo $PBS_NODEFILE
cat $PBS_NODEFILE
#----------------------------------------------
# cd to the working directory from which the job was submitted
#
cd $PBS_O_WORKDIR
# How many procs do I have?
setenv NP `wc -l $PBS_NODEFILE | awk '{print $1}'`
# Create uniq hostfile for use in hybrid (MPI/OpenMP) codes and for rsh-script use
uniq $PBS_NODEFILE > machinefile.uniq.$PBS_JOBID
# How many nodes do I have?
setenv NPU `wc -l machinefile.uniq.$PBS_JOBID | awk '{print $1}'`
#
# Run the MPI code called "executable", provided it is in PBS_O_WORKDIR
#
# Start LAM (verbose with -v)
lamboot -v
# Default (will use the best transport available)
mpirun -np $NP ./executable
# or
mpiexec -n $NP ./executable
# Using lamd (lowest performance, full tracing)
mpirun -ssi rpi lamd -O -np $NP ./executable
# or
mpiexec -ssi rpi lamd -n $NP ./executable
# Using SYSV (Shared memory, oversubscribed, should work but won't work currently)
#mpirun -ssi rpi sysv -O -np $NP ./executable
# or
#mpiexec -ssi rpi sysv -n $NP ./executable
# Using USYSV (Shared memory high performance, only 2 procs max on 2 processor nodes)
mpirun -ssi rpi usysv -O -np $NP ./executable
# or
mpiexec -ssi rpi usysv -n $NP ./executable
# Using Myrinet (Provided you have asked for Myrinet nodes)
mpirun -ssi rpi gm -O -np $NP ./executable
# or
mpiexec -ssi rpi gm -n $NP ./executable
# Using Gigabit/Fast Ethernet
mpirun -ssi rpi tcp -O -np $NP ./executable
# or
mpiexec -ssi rpi tcp -n $NP ./executable
# Using Gigabit/Fast Ethernet with Checkpoint-Restart (CR)
mpirun -ssi rpi crtcp -O -np $NP ./executable
# or
mpiexec -ssi rpi crtcp -n $NP ./executable
# End LAM
lamhalt
# Cleanup
# Remove the unique machinefiles
rm $PBS_O_WORKDIR/machinefile.uniq.$PBS_JOBID
#
# Exit (not strictly necessary)
#
exit
Using Open MPI and an executable that was compiled with the generic compiler drivers.
#!/bin/csh
# invoking mpirun/mpiexec on ACES Linux clusters
#
# All PBS options start as "#PBS " and can be specified on the command line
# after qsub instead of being embedded in the script file.
#----------------------------------------------
# o Queue name
# -q queue
# Parallel queues available on itrda are:
# four (2hours,16nodes),four-twelve (12hours,26nodes),long (168hours,64nodes)
#PBS -q four
#----------------------------------------------
# o Job name instead of the PBS script filename
# -N Job name (use a distinguishing name)
#PBS -N MyNameOpenMPI
#----------------------------------------------
# o Resource lists
# -l resource lists, separated by a ","
# To ask for N nodes use "nodes=N"
# To ask for 2 processor per node use ":ppn=2", otherwise ":ppn=1"
# after the nodes=N. Preferably use ppn=2 and ask for less nodes.
# To ask for Myrinet use ":myrinet", for Gigabit Ethernet use ":gigabit"
# after the nodes=N:ppn=M
# To specify total wallclock time use "walltime=hh:mm:ss"
#PBS -l nodes=16:ppn=1,walltime=00:10:00
#----------------------------------------------
# o stderr/out combination
# -j {eo|oe}
# Causes the standard error and standard output to be combined in one file.
# For standard output to be added to standard error use "eo"
# For standard error to be added to standard output use "oe"
#
# o stderr/out (specify them instead if getting script.[oe]$PBS_JOBID
# -e standard error file
# -o standard output file
# You can append ${PBS_JOBID} to ensure distict filenames
#PBS -e myrunOpenMPI.stderr
#PBS -o myrunOpenMPI.stdout
#----------------------------------------------
# o Starting time
# -a time
# Declares the time after which the job is eligible for execution.
#----------------------------------------------
# o User notification
# -m {a|b|e}
# Send mail to the user when:
# job aborts: "a", job begins running: "b", job ends: "e"
#PBS -m ae
#----------------------------------------------
# o Exporting of environment
# -V export all my environment var's
#PBS -V
#----------------------------------------------
# Begin execution
#
# Check the environment variables
#
#printenv
#
# Get the right Open MPI module
# The same module for the Intel, Portland Group or GNU compilers
#
#module add openmpi/1.0
module add openmpi/1.0.1
#
# get PBS node info
#
echo $PBS_NODEFILE
cat $PBS_NODEFILE
#----------------------------------------------
# cd to the working directory from which the job was submitted
#
cd $PBS_O_WORKDIR
# How many procs do I have?
setenv NP `wc -l $PBS_NODEFILE | awk '{print $1}'`
# Create uniq hostfile for use in hybrid (MPI/OpenMP) codes and for rsh-script use
uniq $PBS_NODEFILE > machinefile.uniq.$PBS_JOBID
# How many nodes do I have?
setenv NPU `wc -l machinefile.uniq.$PBS_JOBID | awk '{print $1}'`
#
# Run the MPI code called "executable", provided it is in PBS_O_WORKDIR
#
# Default (will use the best transport available)
mpirun -np $NP ./executable
# or
mpiexec -n $NP ./executable
# Cleanup
# Remove the unique machinefiles
rm $PBS_O_WORKDIR/machinefile.uniq.$PBS_JOBID
#
# Exit (not strictly necessary)
#
exit
Using MPICH-VMI and an executable that was compiled with the corresponding compiler (Intel/GNU) drivers. (No Portland Group Compiler driver could be prepared.)
#!/bin/csh
# invoking mpirun on ACES Linux clusters
#
# All PBS options start as "#PBS " and can be specified on the command line
# after qsub instead of being embedded in the script file.
#----------------------------------------------
# o Queue name
# -q queue
# Parallel queues available on itrda are:
# four (2hours,16nodes),four-twelve (12hours,26nodes),long (168hours,64nodes)
#PBS -q four
#----------------------------------------------
# o Job name instead of the PBS script filename
# -N Job name (use a distinguishing name)
#PBS -N MyNameMPICHVMI
#----------------------------------------------
# o Resource lists
# -l resource lists, separated by a ","
# To ask for N nodes use "nodes=N"
# To ask for 2 processor per node use ":ppn=2", otherwise ":ppn=1"
# after the nodes=N. Preferably use ppn=2 and ask for less nodes.
# To ask for Myrinet use ":myrinet", for Gigabit Ethernet use ":gigabit"
# after the nodes=N:ppn=M
# To specify total wallclock time use "walltime=hh:mm:ss"
#PBS -l nodes=16:ppn=1,walltime=00:10:00
#----------------------------------------------
# o stderr/out combination
# -j {eo|oe}
# Causes the standard error and standard output to be combined in one file.
# For standard output to be added to standard error use "eo"
# For standard error to be added to standard output use "oe"
#
# o stderr/out (specify them instead if getting script.[oe]$PBS_JOBID
# -e standard error file
# -o standard output file
# You can append ${PBS_JOBID} to ensure distict filenames
#PBS -e myrunMPICH-VMI.stderr
#PBS -o myrunMPICH-VMI.stdout
#----------------------------------------------
# o Starting time
# -a time
# Declares the time after which the job is eligible for execution.
#----------------------------------------------
# o User notification
# -m {a|b|e}
# Send mail to the user when:
# job aborts: "a", job begins running: "b", job ends: "e"
#PBS -m ae
#----------------------------------------------
# o Exporting of environment
# -V export all my environment var's
#PBS -V
#----------------------------------------------
# Begin execution
#
# Check the environment variables
#
#printenv
#
# Get the right MPICH-VMI module
# Different modules for the Intel or GNU compilers
#
#module add mpich-vmi/1.2.5/intel
#module add mpich-vmi/1.2.5/gnu
#module add mpich-vmi/1.2.6/intel
module add mpich-vmi/1.2.6/gnu
#
# get PBS node info
#
echo $PBS_NODEFILE
cat $PBS_NODEFILE
#----------------------------------------------
# cd to the working directory from which the job was submitted
#
cd $PBS_O_WORKDIR
# How many procs do I have?
setenv NP `wc -l $PBS_NODEFILE | awk '{print $1}'`
# Create uniq hostfile for use in hybrid (MPI/OpenMP) codes and for rsh-script use
uniq $PBS_NODEFILE > machinefile.uniq.$PBS_JOBID
# How many nodes do I have?
setenv NPU `wc -l machinefile.uniq.$PBS_JOBID | awk '{print $1}'`
#
# Run the MPI code called "executable", provided it is in PBS_O_WORKDIR
#
# Start VMIeyes
setenv VMIEYESBIN `which vmieyes`
mpirun -np $NPU -machinefile machinefile.uniq.$PBS_JOBID $VMIEYESBIN --reaper=localhost
# Default (will use the TCP transport)
mpirun -np $NP ./executable
# Using Myrinet (Provided you have asked for Myrinet nodes)
mpirun -specfile myrinet -np $NP ./executable
# Using Gigabit/Fast Ethernet
mpirun -specfile tcp -np $NP ./executable
# Using Gigabit/Fast Ethernet with Myrinet (best of)
mpirun -specfile xsite-myrinet-tcp -np $NP ./executable
# Cleanup
# Remove the unique machinefiles
rm $PBS_O_WORKDIR/machinefile.uniq.$PBS_JOBID
# Kill vmieyes
pbsdsh killall vmieyes
# Finally cleanup the temporary vmieyes database files
rm vmieyes-*.db
#
# Exit (not strictly necessary)
#
exit
Using MPICH2 and an executable that was compiled with the corresponding compiler (Intel/GNU) drivers. (No Portland Group Compiler driver could be prepared.) The recommended way is to use mpiexec to spawn jobs.
#!/bin/csh
# invoking mpiexec on ACES Linux cluster
#
# All PBS options start as "#PBS " and can be specified on the command line
# after qsub instead of being embedded in the script file.
#----------------------------------------------
# o Queue name
# -q queue
# Example parallel queues available on itrda are:
# four (2hours,16nodes),four-twelve (12hours,26nodes),long (168hours,64nodes)
#PBS -q four
#----------------------------------------------
# o Job name instead of the PBS script filename
# -N Job name (use a distinguishing name)
#PBS -N MyNameMPICH2
#----------------------------------------------
# o Resource lists
# -l resource lists, separated by a ","
# To ask for N nodes use "nodes=N"
# To ask for 2 processor per node use ":ppn=2", otherwise ":ppn=1"
# after the nodes=N. Preferably use ppn=2 and ask for less nodes.
# To ask for Myrinet use ":myrinet", for Gigabit Ethernet use ":gigabit"
# after the nodes=N:ppn=M
# To specify total wallclock time use "walltime=hh:mm:ss"
#PBS -l nodes=16:ppn=1,walltime=00:10:00
#----------------------------------------------
# o stderr/out combination
# -j {eo|oe}
# Causes the standard error and standard output to be combined in one file.
# For standard output to be added to standard error use "eo"
# For standard error to be added to standard output use "oe"
#
# o stderr/out (specify them instead if getting script.[oe]$PBS_JOBID
# -e standard error file
# -o standard output file
# You can append ${PBS_JOBID} to ensure distict filenames
#PBS -e myrunMPICH2.stderr
#PBS -o myrunMPICH2.stdout
#----------------------------------------------
# o Starting time
# -a time
# Declares the time after which the job is eligible for execution.
#----------------------------------------------
# o User notification
# -m {a|b|e}
# Send mail to the user when:
# job aborts: "a", job begins running: "b", job ends: "e"
#PBS -m ae
#----------------------------------------------
# o Exporting of environment
# -V export all my environment var's
#PBS -V
#----------------------------------------------
# Begin execution
#
# Check the environment variables
#
#printenv
#
# Get the right MPICH2 module
# Different modules for the Intel or GNU compilers
#
#module add mpich2/1.0/intel
#module add mpich2/1.0/gnu
#module add mpich2/1.0.1/intel
#module add mpich2/1.0.1/gnu
#module add mpich2/1.0.3/intel
module add mpich2/1.0.3/gnu
# Need this!
module add mpiexec
setenv PBS_DEFAULT `echo $PBS_NODEFILE | awk -F. '{print $2}'`
#
# get PBS node info
#
echo $PBS_NODEFILE
cat $PBS_NODEFILE
#----------------------------------------------
# cd to the working directory from which the job was submitted
#
cd $PBS_O_WORKDIR
# How many procs do I have?
setenv NP `wc -l $PBS_NODEFILE | awk '{print $1}'`
# Create uniq hostfile for use in hybrid (MPI/OpenMP) codes and for rsh-script use
uniq $PBS_NODEFILE > machinefile.uniq.$PBS_JOBID
# How many nodes do I have?
setenv NPU `wc -l machinefile.uniq.$PBS_JOBID | awk '{print $1}'`
#
# Run the MPI code called "executable", provided it is in PBS_O_WORKDIR
#
mpiexec -comm pmi -n $NP ./executable
# or if you want to use all processors anyway in which case you do not
# have to figure out what NP is above:
mpiexec -comm pmi ./executable
# Cleanup
# Remove the unique machinefiles
rm $PBS_O_WORKDIR/machinefile.uniq.$PBS_JOBID
#
# Exit (not strictly necessary)
#
exit
Using MPICH2 and an executable that was compiled with the corresponding compiler (Intel/GNU) drivers. (No Portland Group Compiler driver could be prepared.) One may use mpirun or the MPI-2 standard based mpiexec included in MPICH2 to spawn jobs. This is NOT the recommended way to launch MPICH2 jobs on the clusters!
#!/bin/csh
# invoking mpirun/mpiexec on ACES Linux cluster
#
# All PBS options start as "#PBS " and can be specified on the command line
# after qsub instead of being embedded in the script file.
#----------------------------------------------
# o Queue name
# -q queue
# Example parallel queues available on itrda are:
# four (2hours,16nodes),four-twelve (12hours,26nodes),long (168hours,64nodes)
#PBS -q four
#----------------------------------------------
# o Job name instead of the PBS script filename
# -N Job name (use a distinguishing name)
#PBS -N MyNameMPICH2
#----------------------------------------------
# o Resource lists
# -l resource lists, separated by a ","
# To ask for N nodes use "nodes=N"
# To ask for 2 processor per node use ":ppn=2", otherwise ":ppn=1"
# after the nodes=N. Preferably use ppn=2 and ask for less nodes.
# To ask for Myrinet use ":myrinet", for Gigabit Ethernet use ":gigabit"
# after the nodes=N:ppn=M
# To specify total wallclock time use "walltime=hh:mm:ss"
#PBS -l nodes=16:ppn=1,walltime=00:10:00
#----------------------------------------------
# o stderr/out combination
# -j {eo|oe}
# Causes the standard error and standard output to be combined in one file.
# For standard output to be added to standard error use "eo"
# For standard error to be added to standard output use "oe"
#
# o stderr/out (specify them instead if getting script.[oe]$PBS_JOBID
# -e standard error file
# -o standard output file
# You can append ${PBS_JOBID} to ensure distict filenames
#PBS -e myrunMPICH2.stderr
#PBS -o myrunMPICH2.stdout
#----------------------------------------------
# o Starting time
# -a time
# Declares the time after which the job is eligible for execution.
#----------------------------------------------
# o User notification
# -m {a|b|e}
# Send mail to the user when:
# job aborts: "a", job begins running: "b", job ends: "e"
#PBS -m ae
#----------------------------------------------
# o Exporting of environment
# -V export all my environment var's
#PBS -V
#----------------------------------------------
# Begin execution
#
# Check the environment variables
#
#printenv
#
# Get the right MPICH2 module
# Different modules for the Intel or GNU compilers
#
#module add mpich2/1.0/intel
#module add mpich2/1.0/gnu
#module add mpich2/1.0.1/intel
#module add mpich2/1.0.1/gnu
#module add mpich2/1.0.3/intel
module add mpich2/1.0.3/gnu
#
# get PBS node info
#
echo $PBS_NODEFILE
cat $PBS_NODEFILE
#----------------------------------------------
# cd to the working directory from which the job was submitted
#
cd $PBS_O_WORKDIR
# How many procs do I have?
setenv NP `wc -l $PBS_NODEFILE | awk '{print $1}'`
# Create uniq hostfile for use in hybrid (MPI/OpenMP) codes and for rsh-script use
uniq $PBS_NODEFILE > machinefile.uniq.$PBS_JOBID
# How many nodes do I have?
setenv NPU `wc -l machinefile.uniq.$PBS_JOBID | awk '{print $1}'`
#
# Run the MPI code called "executable", provided it is in PBS_O_WORKDIR
#
# Start mpd
mpdboot --totalnum=$NPU --file=$PBS_O_WORKDIR/machinefile.uniq.$PBS_JOBID --rsh=rsh
# Default (will use the TCP transport)
mpirun -np $NP ./executable
# or
mpiexec -n $NP ./executable
# Cleanup
# Remove the unique machinefiles
rm $PBS_O_WORKDIR/machinefile.uniq.$PBS_JOBID
# Kill the mpd daemons
mpdallexit
#
# Exit (not strictly necessary)
#
exit
|