debbiemarkslab · loodvn · Mar 15, 2022
diff --git a/examples/scripts/score_O2_job_array.sh b/examples/scripts/score_O2_job_array.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+# This script can be used to score an ensemble of models.
+# To score just one, just set a constant seed e.g. seeds=(42) and only use array ids between 0-99.
+#SBATCH -c 2                           	# Request one core
+#SBATCH -N 1                           	# Request one node (if you request more than one core with -c, also using
+                                       	# -N 1 means all cores will be on the same node)
+#SBATCH -t 0-23:59                      # Runtime in D-HH:MM format
+#SBATCH -p gpu_quad,gpu_marks,gpu #,gpu_requeue        # Partition to run in
+# If on gpu_quad, use teslaV100s
+# If on gpu_requeue, use teslaM40 or a100?
+# If on gpu, any of them are fine (teslaV100, teslaM40, teslaK80) although K80 sometimes is too slow
+#SBATCH --gres=gpu:1
+#SBATCH --constraint=gpu_doublep          # Only use double precision GPUs, otherwise our theano version can't use them
+#SBATCH --qos=gpuquad_qos
+#SBATCH --mem=20G                          # Memory total in MB (for all cores)
+
+# To use email notifications, set both of the following options
+##SBATCH --mail-type=TIME_LIMIT_80,TIME_LIMIT,FAIL,ARRAY_TASKS
+##SBATCH --mail-user="<email>"
+
+#SBATCH -vv  # Verbose
+
+##SBATCH -o slurm-%j.out                 # File to which STDOUT + STDERR will be written, including job ID in filename
+#SBATCH --job-name="score_deepseq"
+# Job array-specific
+#SBATCH --output=slurm_files/slurm-lvn-%A_%3a-%x.out        # Nice tip: using %3a to pad to 3 characters (23 -> 023)
+##SBATCH --array=0-86,100-186,200-286,300-386,400-486%10          		# 87 DMSs in total benchmark
+
+################################################################################
+
+set -e # fail fully on first line failure (from Joost slurm_for_ml)
+
+# Note: Remember to clear ~/.theano cache before running this script, otherwise jobs eventually start crashing while compiling theano simultaneously
+
+echo "hostname: $(hostname)"
+echo "Running from: $(pwd)"
+echo "GPU available: $(nvidia-smi)"
+module load gcc/6.2.0 cuda/9.0
+export THEANO_FLAGS='floatX=float32,device=cuda,force_device=True,traceback.limit=20, exception_verbosity=high' # Otherwise will only raise a warning and carry on with CPU
+
+DATASET_ID=$(($SLURM_ARRAY_TASK_ID % 100))  # Group all datasets together in 0xx, 1xx, 2xx, etc.
+SEED_ID=$(($SLURM_ARRAY_TASK_ID / 100))
+seeds=(1 2 3 4 5)  # For some reason Theano won't accept SEED 0..
+SEED=${seeds[$SEED_ID]}
+echo "DATASET_ID: $DATASET_ID, SEED: $SEED"
+
+export dms_mapping=#CSV containing MSA and DMS mappings
+export dms_input_folder=#directory containing DMS input files
+# Remember to create this folder before run:
+export dms_output_folder=#directory to store output CSVs
+export msa_path=#Folder containing MSA files
+export model_checkpoint_dir=#Folder containing model checkpoints
+
+# Monitor GPU usage (store outputs in ./gpu_logs/)
+#/home/lov701/job_gpu_monitor.sh --interval 1m gpu_logs &
+
+srun stdbuf -oL -eL /n/groups/marks/users/aaron/deep_seqs/deep_seqs_env/bin/python \
+  /n/groups/marks/users/lood/DeepSequence_runs/run_muteff_pred_seqs_batch.py \
+  --dms_mapping $dms_mapping \
+  --dms_input_dir $dms_input_folder \
+  --dms_output_dir $dms_output_folder \
+  --msa_path $msa_path \
+  --model_checkpoint $model_checkpoint_dir \
+  --dms_index $DATASET_ID \
+  --samples 2000 \
+  --batch_size 8000 \
+  --seed "$SEED"
+#  --theta-override 0.9
+
+
diff --git a/examples/scripts/train_O2_job_array_seeds.sh b/examples/scripts/train_O2_job_array_seeds.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+#SBATCH -c 2                               # Request two cores
+#SBATCH -N 1                               # Request one node (if you request more than one core with -c, also using
+                                           # -N 1 means all cores will be on the same node)
+#SBATCH -t 0-23:59                         # Runtime in D-HH:MM format
+#SBATCH -p gpu_quad    #,gpu_marks,gpu,gpu_requeue        # Partition to run in
+# If on gpu_quad, use teslaV100s
+# If on gpu_requeue, use teslaM40 or a100?
+# If on gpu, any of them are fine (teslaV100, teslaM40, teslaK80) although K80 sometimes is too slow
+#SBATCH --gres=gpu:1
+#SBATCH --constraint=gpu_doublep          # Only use double precision GPUs, otherwise our theano version can't use them
+#SBATCH --qos=gpuquad_qos
+#SBATCH --mem=20G                          # Memory total in MB (for all cores)
+
+# To use email notifications, set both of the following options
+##SBATCH --mail-type=TIME_LIMIT_80,TIME_LIMIT,FAIL,ARRAY_TASKS
+##SBATCH --mail-user="<email>"
+
+##SBATCH -o slurm_files/slurm-%j.out                 # File to which STDOUT + STDERR will be written, including job ID in filename
+#SBATCH --job-name="deepseq_training"
+
+# Job array-specific
+#SBATCH --output=slurm_files/slurm-lvn-%A_%3a-%x.out   # Nice tip: using %3a to pad to 3 characters (23 -> 023)
+##SBATCH --error=slurm_files/slurm-lvn-%A_%3a-%x.err   # Optional: Redirect STDERR to its own file
+#SBATCH --array=0-49,100-149,200-249,300-349,400-449%10  # 5 seeds, e.g. 50 MSAs, with maximum 10 simultaneous jobs
+#SBATCH --hold  # Holds job so that we can first manually check a few
+
+# Quite neat workflow:
+# Submit job array in held state, then release first job to test
+# Add a dependency so that the next jobs are submitted as soon as the first job completes successfully:
+# scontrol update Dependency=afterok:<jobid>_0 JobId=<jobid>
+# Release all the other jobs; they'll be stuck until the first job is done
+################################################################################
+
+set -e # fail fully on first line failure (from Joost slurm_for_ml)
+
+# Note: Remember to clear ~/.theano cache before running this script
+
+echo "hostname: $(hostname)"
+echo "Running from: $(pwd)"
+echo "GPU available: $(nvidia-smi)"
+module load gcc/6.2.0 cuda/9.0
+export THEANO_FLAGS='floatX=float32,device=cuda,force_device=True' # Otherwise will only raise a warning and carry on with CPU
+
+# To generate this file from a directory, just do e.g. '(cd ALIGNMENTS_DIR && ls -1 *.a2m) > datasets.txt'
+lines=( $(cat "msa.txt") ) # v5 benchmark
+DATASET_ID=$(($SLURM_ARRAY_TASK_ID % 100))  # Group a run of datasets together
+seed_id=$(($SLURM_ARRAY_TASK_ID / 100))
+seed_id=$SLURM_ARRAY_TASK_ID
+seeds=(1 2 3 4 5)  # For some reason Theano won't accept seed 0..
+SEED=${seeds[$seed_id]}
+echo "DATASET_ID: $DATASET_ID, seed: $SEED"
+
+dataset_name=${lines[$DATASET_ID]}
+echo "dataset name: $dataset_name"
+
+export WEIGHTS_DIR=weights_msa_tkmer_20220227
+export ALIGNMENTS_DIR=msa_tkmer_20220227
+
+## Monitor GPU usage (store outputs in ./gpu_logs/)
+#/home/lov701/job_gpu_monitor.sh --interval 1m gpu_logs &
+
+srun stdbuf -oL -eL /n/groups/marks/users/aaron/deep_seqs/deep_seqs_env/bin/python \
+  /n/groups/marks/users/lood/DeepSequence_runs/run_svi.py \
+  --dataset $dataset_name \
+  --weights_dir $WEIGHTS_DIR \
+  --alignments_dir $ALIGNMENTS_DIR \
+  --seed $SEED
+#  --theta-override 0.9