#! /bin/bash echo "Preparing:" set -x # Output commands set -e # Abort on errors cd @RUNDIR@-active echo "Checking:" pwd hostname date cat ${PBS_NODEFILE} > SIMFACTORY/NODES echo "Environment:" export GMON_OUT_PREFIX=gmon.out export OMP_NUM_THREADS=@NUM_THREADS@ # For jobs with >128 MPI processes one may need to increase the # unexpected receive queue buffer size #export MPICH_UNEX_BUFFER_SIZE=40M export MPICH_UNEX_BUFFER_SIZE=100M # Setting this seems to make MPI hang #export MPICH_MAX_SHORT_MSG_SIZE=16000 export MPICH_PTL_UNEX_EVENTS=204800 # NAMD uses: #export MPICH_PTL_SEND_CREDITS=-1 #export MPICH_MAX_SHORT_MSG_SIZE=8000 #export MPICH_PTL_UNEX_EVENTS=80000 #export MPICH_UNEX_BUFFER_SIZE=100M export MPICH_ENV_DISPLAY=1 export MPICH_VERSION_DISPLAY=1 export MPICH_ABORT_ON_ERROR=1 # NOTE: Kraken has 2 sockets per node with 6 cores each # aprun options: # These are necessary: # -n Total number of MPI processes # -d Specifies number of cores per MPI process (1-12) # These are implied: # -N Number of MPI processes per node (1-12) # -S Number of MPI processes per socket (1-6) # Memory options: # -m Amount of memory per MPI process (with suffix k, M, or G) # -ss Allocate memory only on same socket # Specify the number of MPI processes per node explicitly. Also # specify the number of MPI processes per socket if this makes sense. NODE_PROCS='-N @(@PPN_USED@ / @NUM_THREADS@)@' SOCKET_PROCS='@(@PPN_USED@ % (2 * @NUM_THREADS@) == 0 ? "-S @(@PPN_USED@ / (2 * @NUM_THREADS@))@" : "")@' env | sort > SIMFACTORY/ENVIRONMENT # rsync something (e.g. initial data files) to scratch for simulation # to use before it starts if [ "x${RSYNC_TO_SCRATCH}" != "x" ]; then echo "Copying files in ${RSYNC_TO_SCRATCH} to @RUNDIR@" rsync -Pav "${RSYNC_TO_SCRATCH}" @RUNDIR@ fi echo "Starting:" export CACTUS_STARTTIME=$(date +%s) #aprun -n @NUM_PROCS@ -N @(@PPN_USED@/@NUM_THREADS@)@ -d @NUM_THREADS@ ./@EXECUTABLE@ -L 3 @PARFILE@ if [ @RUNDEBUG@ -eq 0 ]; then aprun -n @NUM_PROCS@ -d @NUM_THREADS@ ${NODE_PROCS} ${SOCKET_PROCS} @EXECUTABLE@ -L 3 @PARFILE@ else module load totalview totalview aprun -a -n @NUM_PROCS@ -d @NUM_THREADS@ @EXECUTABLE@ -L 3 @PARFILE@ fi echo "Stopping:" date echo "Done."