#! /bin/sh ##SBATCH --account=next #SBATCH --job-name=mom4_Aeolus_lad-init-21-mar-clim # # without any nodes or tasks specification, will allocate 1 task, just what we need for now ##SBATCH --nodes 1 #SBATCH --tasks 1 ##SBATCH --tasks-per-node=1 ##SBATCH --cpus-per-tasks=1 ##SBATCH --exclusive # # Acceptable time formats include "minutes", "minutes:seconds", "hours:minutes:seconds", # "days-hours", "days-hours:minutes" and "days-hours:minutes:seconds" ##SBATCH --qos=short ##SBATCH --time=300 ##SBATCH --time=2:00:00 ##SBATCH --time=23:00:00 # #SBATCH --qos=medium #SBATCH --time=160:00:00 ##SBATCH --time=50:00:00 # ##SBATCH --qos=long ##SBATCH --time=200:00:00 ##SBATCH --time=50:00:00 ## huge: up to 4096 CPUs = 256 nodes, but only up to 1h, with low scheduling prio ##SBATCH --qos=huge ##SBATCH --time=60 #SBATCH --output=fms.%j.out ##SBATCH --error=fms.1.%j.err #SBATCH --mail-type=ALL,TIME_LIMIT_90,TIME_LIMIT export LANG=C set -x ln -s $SLURM_JOBID .workdir_locked_by_batchjob if [ $? -ne 0 ] then # sigh. it might be a restart attempt initated by loadleveler after a vacate order # in that case the job ID is still the same that created the existing lock file. # And if so, we just start anew silently. oldjobid="`ls -l .workdir_locked_by_batchjob | awk '{print $11}'`" if [ "$oldjobid" != "$SLURM_JOBID" ] then echo Error: Directory `pwd` is locked by another batch job: ls -l .workdir_locked_by_batchjob exit 1 else echo own lockfile already there - looks like a slurm restart attempt >> fms.$SLURM_JOBID.restart-log echo `date` `hostname` >> fms.$SLURM_JOBID.restart-log fi fi date set +x env | egrep SBATCH\|SLURM\|MPI\|OMP\|KMP | sort echo Hard Limits ulimit -aH echo #ulimit -c unlimited ulimit -s unlimited ulimit -d unlimited #ulimit -m 3670016 #ulimit -v unlimited #ulimit -f unlimited ulimit -t unlimited echo Soft Limits ulimit -a set -x mkdir -p RESTART history #llgetmachinelist | sed "s/blade/bi/" > cpulist.$LOADL_STEP_ID #sort -u cpulist.$LOADL_STEP_ID > mpdhosts.$LOADL_STEP_ID #machine_count=`cat mpdhosts.$LOADL_STEP_ID | wc -l` #cpu_count=`cat cpulist.$LOADL_STEP_ID | wc -l` rm -f *.nc *.nc.[0-9][0-9][0-9][0-9]* # concatenate the values of both variables, one might be the empty string if [ "$SLURM_NTASKS""$SLURM_NNODES" -gt 1 ] then export MPD_CON_EXT=Slurm_Job_$SLURM_JOBID #export I_MPI_PMI_LIBRARY=/p/system/slurm/lib/libpmi.so #On 09/04/15 14:20, Ciaron Linstead wrote: #> #> To use the (Intel-recommended) DAPL-UD, the following variables need to be set: #> #export I_MPI_FABRICS=shm:dapl #export I_MPI_DAPL_UD=enable #export I_MPI_DAPL_UD_PROVIDER=ofa-v2-mlx5_0-1u #> #> It seems that these have an effect on larger jobs (e.g. LPJ 256-task), and #> allow I_MPI_DYNAMIC_CONNECTION to be enabled. #On 10/08/15 10:14, Ciaron wrote: #> #> Ticket Number: 0109499 #> #> I'm not sure about srun parameters, but I think we're going to recommend to #> users to use "mpirun -bootstrap slurm" #> #> You could try that with I_MPI_DEBUG=5 to check the fabric. # export MV2_SMP_USE_CMA=0 # unset NC_BLKSZ # unset NC_BLOCKSIZE # export SCALASCA_DIR=/home/petri # #export EPK_VERBOSE=1 # #export EPK_TRACE=1 # #export ELG_BUFFER_SIZE=XXX # export ESD_BUFFER_SIZE=1000000 # mpdboot -n $machine_count -r ssh -f mpdhosts.$LOADL_STEP_ID # -genv F_UFMTENDIAN big -genv NC_BLKSZ 8192 -genv NC_BLOCKSIZE 8192 # time scalasca -analyze -t -f EPIK.FILTER mpiexec -machinefile cpulist.$LOADL_STEP_ID -n 31 ./fms_MOM_LAD_AEOLUS.x > fms.out-$LOADL_STEP_ID 2>&1 # time mpiexec -machinefile cpulist.$LOADL_STEP_ID -n 31 ./fms_MOM_LAD_AEOLUS.x > fms.out-$LOADL_STEP_ID 2>&1 #export I_MPI_DEBUG=5 time srun --propagate=ALL ./fms_MOM_LAD_AEOLUS.x > fms.out-$SLURM_NNODES-$SLURM_NTASKS-$SLURM_JOBID 2>&1 #time mpirun -v -bootstrap slurm ./fms_MOM_LAD_AEOLUS.x > fms.out-$SLURM_NNODES-$SLURM_NTASKS-$SLURM_JOBID 2>&1 success=$? mpdallexit bases=`ls -1 *.nc.[0-9][0-9][0-9][0-9]* | sed -e "s/.nc.[0-9][0-9][0-9][0-9]\+/.nc/" | sort -u` for b in $bases ; do echo mppnccombine $b ; /iplex/01/climber3/petri/mom5.0.2/bin/mppnccombine.pik-iplex-ifort11 -r $b $b.[0-9][0-9][0-9][0-9]* ; done else #time srun --propagate ./fms_MOM_LAD_AEOLUS.x > fms.out-$SLURM_NNODES-$SLURM_NTASKS-$SLURM_JOBID 2>&1 unset SLURM_PMI_KVS_DUP_KEYS unset SLURM_JOB_ID SLURM_STEPID SLURM_NPROCS SLURM_PROCID SLURM_GTIDS time ./fms_MOM_LAD_AEOLUS.x > fms.out-$SLURM_NNODES-$SLURM_NTASKS-$SLURM_JOBID 2>&1 success=$? fi #rm -f cpulist.$LOADL_STEP_ID #rm -f mpdhosts.$LOADL_STEP_ID echo FMS exited with $success if [ 0 = "$success" ] ; then echo FMS run endded successfully begindate=`../../../bin/time_stamp.csh -bf digital` if [ "$begindate" == "" ] ; then begindate=tmp`date +%Y%j%H%M%S` ; fi enddate=`../../../bin/time_stamp.csh -ef digital` if [ "$enddate" == "" ] ; then enddate=tmp`date +%Y%j%H%M%S` ; fi rm -f time_stamp.out for i in *.nc ; do mv $i history/$enddate.$i; done tar cvjf history/$enddate.out.tar.bz2 fms.out-$SLURM_NNODES-$SLURM_NTASKS-$SLURM_JOBID *.out cp -p input.nml data_table diag_table field_table RESTART/. mv RESTART $enddate.RESTART tar cvjf history/$enddate.RESTART.tar.bz2 $enddate.RESTART/. mv $enddate.RESTART RESTART else echo FMS run failed with code $success fi sacct -p -j $SLURM_JOBID --format=JobID,JobName,MinCPU,MinCPUNode,MinCPUTask,AveCPU,NTasks,AllocCPUS,Elapsed,ExitCode,AveCPUFreq,ReqCPUFreq,ConsumedEnergy rm -f .workdir_locked_by_batchjob exit $success