diff --git a/config b/config new file mode 120000 index 0000000..328773a --- /dev/null +++ b/config @@ -0,0 +1 @@ +python/lsst/ctrl/platform/s3df/etc/config \ No newline at end of file diff --git a/python/lsst/ctrl/platform/s3df/etc/templates/allocation.sh.template b/python/lsst/ctrl/platform/s3df/etc/templates/allocation.sh.template index afd69fe..6d6078c 100644 --- a/python/lsst/ctrl/platform/s3df/etc/templates/allocation.sh.template +++ b/python/lsst/ctrl/platform/s3df/etc/templates/allocation.sh.template @@ -12,6 +12,15 @@ else export RUBIN_COLLECTOR_HOST="sdfiana012.sdf.slac.stanford.edu" fi +if [ $NODESET ]; then + echo "NODESET is specified" + echo "NODESET $NODESET" + NODESET_CLAUSE=" && (JobNodeset == \"$NODESET\")" +else + echo "NODESET is empty" + NODESET_CLAUSE="" +fi + export RUBIN_COLLECTOR_PORT=$CPORT echo "Using RUBIN_COLLECTOR_HOST $RUBIN_COLLECTOR_HOST" echo "Using RUBIN_COLLECTOR_PORT $RUBIN_COLLECTOR_PORT" @@ -78,16 +87,18 @@ export _CONDOR_SBIN=${RELEASE_DIR}/sbin export _CONDOR_FILETRANSFER_PLUGINS=${RELEASE_DIR}/libexec/curl_plugin # export _CONDOR_STARTD_RESOURCE_PREFIX=slot_${VERY_RNUM}_ -export _CONDOR_STARTD_RESOURCE_PREFIX=slot_${myuser}_${VERY_RNUM}_ +# Add an informational slot name so that slots of different nodesets do not look the same +N10=`echo $NODESET | cut -c1-10` +export _CONDOR_STARTD_RESOURCE_PREFIX=${N10}slot_${myuser}_${VERY_RNUM}_ SPAN_MEMORY=1000 LOWER_BOUND=$((SLURM_MEM_PER_NODE - SPAN_MEMORY)) UPPER_BOUND=$((SLURM_MEM_PER_NODE + SPAN_MEMORY)) -common_job_name=glide_${myuser} +common_job_name="$NODESETglide_${myuser}" if [ $SLURM_JOB_NAME == ${common_job_name} ]; then - export _CONDOR_START="(Owner == \"${myuser}\")" + export _CONDOR_START="(Owner == \"${myuser}\")${NODESET_CLAUSE}" else - export _CONDOR_START="(Owner == \"${myuser}\") && (RequestMemory>${LOWER_BOUND}) && (RequestMemory<${UPPER_BOUND})" + export _CONDOR_START="(Owner == \"${myuser}\")${NODESET_CLAUSE} && (RequestMemory>${LOWER_BOUND}) && (RequestMemory<${UPPER_BOUND})" fi echo _CONDOR_START echo ${_CONDOR_START} diff --git a/python/lsst/ctrl/platform/s3df/etc/templates/generic.slurm.template b/python/lsst/ctrl/platform/s3df/etc/templates/generic.slurm.template index 551dfda..748ea2a 100644 --- a/python/lsst/ctrl/platform/s3df/etc/templates/generic.slurm.template +++ b/python/lsst/ctrl/platform/s3df/etc/templates/generic.slurm.template @@ -5,7 +5,7 @@ #SBATCH -A $ACCOUNT #SBATCH --ntasks-per-node 1 #SBATCH --cpus-per-task $CPUS -#SBATCH -J glide_$USER_NAME +#SBATCH -J $NODESETglide_$USER_NAME $QOS $RESERVATION $EXCLUSIVE diff --git a/python/lsst/ctrl/platform/s3df/etc/templates/glidein_condor_config.template b/python/lsst/ctrl/platform/s3df/etc/templates/glidein_condor_config.template index fdf7431..5bb740e 100644 --- a/python/lsst/ctrl/platform/s3df/etc/templates/glidein_condor_config.template +++ b/python/lsst/ctrl/platform/s3df/etc/templates/glidein_condor_config.template @@ -42,9 +42,12 @@ $DYNAMIC_SLOTS_BLOCK $PACK_BLOCK + +$NODESET_BLOCK + STARTD_NOCLAIM_SHUTDOWN=$GLIDEIN_SHUTDOWN -STARTD_ATTRS = ALLOCATED_NODE_SET +STARTD_ATTRS = $(STARTD_ATTRS) Nodeset # hold jobs that the startd detects have exceeded assigned memory MEMORY_EXCEEDED = ((MemoryUsage > Memory) =!= TRUE) diff --git a/templates b/templates new file mode 120000 index 0000000..50ab18d --- /dev/null +++ b/templates @@ -0,0 +1 @@ +python/lsst/ctrl/platform/s3df/etc/templates \ No newline at end of file