diff --git a/jobs/dpo/dpo_cppo_multi_gpu.sh b/jobs/dpo/dpo_cppo_multi_gpu.sh index 52e0f094..d427e72e 100644 --- a/jobs/dpo/dpo_cppo_multi_gpu.sh +++ b/jobs/dpo/dpo_cppo_multi_gpu.sh @@ -7,7 +7,7 @@ #SBATCH --mem=64G #SBATCH --time=24:00:00 #SBATCH --output=out/%x.%j.out # Include job name + job ID -#SBATCH --error=out/%x.%j.err # Include job name + job ID +#SBATCH --error=out/%x.%j.err # Include job name + job ID #SBATCH --mail-type=ALL #SBATCH --account=aip-rrabba #SBATCH --mail-user=shahrad_m@icloud.com # Update with your email @@ -34,4 +34,4 @@ accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero --output_dir "$SCRATCH/projects/Qwen2-0.5B-DPO-${dataset_name}" \ --no_remove_unused_columns \ --wandb_project $dataset_name \ - --wandb_run_name "Qwen2-0.5B-DPO-${dataset_name}-multi-gpu" \ No newline at end of file + --wandb_run_name "Qwen2-0.5B-DPO-${dataset_name}-multi-gpu" diff --git a/jobs/dpo/dpo_domain_shift_multi_gpu.sh b/jobs/dpo/dpo_domain_shift_multi_gpu.sh index 0286b46c..c95af9e9 100644 --- a/jobs/dpo/dpo_domain_shift_multi_gpu.sh +++ b/jobs/dpo/dpo_domain_shift_multi_gpu.sh @@ -7,7 +7,7 @@ #SBATCH --mem=64G #SBATCH --time=24:00:00 #SBATCH --output=out/%x.%j.out # Include job name + job ID -#SBATCH --error=out/%x.%j.err # Include job name + job ID +#SBATCH --error=out/%x.%j.err # Include job name + job ID #SBATCH --mail-type=ALL #SBATCH --account=aip-rrabba #SBATCH --mail-user=shahrad_m@icloud.com # Update with your email @@ -20,7 +20,7 @@ accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero benchmarks/dpo/dpo_continual.py \ --dataset_name $dataset_name \ --model_name_or_path Qwen/Qwen2-0.5B-Instruct \ - --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_CPPO_REWARD \ + --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_${dataset_name}_REWARD \ --learning_rate 5.0e-6 \ --num_train_epochs 4 \ --per_device_train_batch_size 8 \ diff --git a/jobs/dpo/dpo_lipschitz_multi_gpu.sh b/jobs/dpo/dpo_lipschitz_multi_gpu.sh index 2815641c..9f07cf8f 100644 --- a/jobs/dpo/dpo_lipschitz_multi_gpu.sh +++ b/jobs/dpo/dpo_lipschitz_multi_gpu.sh @@ -7,7 +7,7 @@ #SBATCH --mem=64G #SBATCH --time=24:00:00 #SBATCH --output=out/%x.%j.out # Include job name + job ID -#SBATCH --error=out/%x.%j.err # Include job name + job ID +#SBATCH --error=out/%x.%j.err # Include job name + job ID #SBATCH --mail-type=ALL #SBATCH --account=aip-rrabba #SBATCH --mail-user=shahrad_m@icloud.com # Update with your email @@ -20,7 +20,7 @@ accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero benchmarks/dpo/dpo_continual.py \ --dataset_name $dataset_name \ --model_name_or_path Qwen/Qwen2-0.5B-Instruct \ - --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_CPPO_REWARD \ + --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_${dataset_name}_REWARD \ --learning_rate 5.0e-6 \ --num_train_epochs 4 \ --per_device_train_batch_size 8 \ diff --git a/jobs/dpo/dpo_long_piecewise_multi_gpu.sh b/jobs/dpo/dpo_long_piecewise_multi_gpu.sh index f964e68a..7c63fa6e 100644 --- a/jobs/dpo/dpo_long_piecewise_multi_gpu.sh +++ b/jobs/dpo/dpo_long_piecewise_multi_gpu.sh @@ -7,7 +7,7 @@ #SBATCH --mem=64G #SBATCH --time=24:00:00 #SBATCH --output=out/%x.%j.out # Include job name + job ID -#SBATCH --error=out/%x.%j.err # Include job name + job ID +#SBATCH --error=out/%x.%j.err # Include job name + job ID #SBATCH --mail-type=ALL #SBATCH --account=aip-rrabba #SBATCH --mail-user=shahrad_m@icloud.com # Update with your email @@ -20,7 +20,7 @@ accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero benchmarks/dpo/dpo_continual.py \ --dataset_name $dataset_name \ --model_name_or_path Qwen/Qwen2-0.5B-Instruct \ - --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_CPPO_REWARD \ + --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_${dataset_name}_REWARD \ --learning_rate 5.0e-6 \ --num_train_epochs 4 \ --per_device_train_batch_size 8 \ diff --git a/jobs/dpo/dpo_piecewise_multi_gpu.sh b/jobs/dpo/dpo_piecewise_multi_gpu.sh index a74a2bb9..4e3f1b4e 100644 --- a/jobs/dpo/dpo_piecewise_multi_gpu.sh +++ b/jobs/dpo/dpo_piecewise_multi_gpu.sh @@ -7,7 +7,7 @@ #SBATCH --mem=64G #SBATCH --time=24:00:00 #SBATCH --output=out/%x.%j.out # Include job name + job ID -#SBATCH --error=out/%x.%j.err # Include job name + job ID +#SBATCH --error=out/%x.%j.err # Include job name + job ID #SBATCH --mail-type=ALL #SBATCH --account=aip-rrabba #SBATCH --mail-user=shahrad_m@icloud.com # Update with your email @@ -20,7 +20,7 @@ accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero benchmarks/dpo/dpo_continual.py \ --dataset_name $dataset_name \ --model_name_or_path Qwen/Qwen2-0.5B-Instruct \ - --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_CPPO_REWARD \ + --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_${dataset_name}_REWARD \ --learning_rate 5.0e-6 \ --num_train_epochs 4 \ --per_device_train_batch_size 8 \ diff --git a/jobs/dpo/dpo_short_piecewise_multi_gpu.sh b/jobs/dpo/dpo_short_piecewise_multi_gpu.sh index 9abe3bc4..542cb4a6 100644 --- a/jobs/dpo/dpo_short_piecewise_multi_gpu.sh +++ b/jobs/dpo/dpo_short_piecewise_multi_gpu.sh @@ -7,7 +7,7 @@ #SBATCH --mem=64G #SBATCH --time=24:00:00 #SBATCH --output=out/%x.%j.out # Include job name + job ID -#SBATCH --error=out/%x.%j.err # Include job name + job ID +#SBATCH --error=out/%x.%j.err # Include job name + job ID #SBATCH --mail-type=ALL #SBATCH --account=aip-rrabba #SBATCH --mail-user=shahrad_m@icloud.com # Update with your email @@ -20,7 +20,7 @@ accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero benchmarks/dpo/dpo_continual.py \ --dataset_name $dataset_name \ --model_name_or_path Qwen/Qwen2-0.5B-Instruct \ - --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_CPPO_REWARD \ + --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_${dataset_name}_REWARD \ --learning_rate 5.0e-6 \ --num_train_epochs 4 \ --per_device_train_batch_size 8 \ diff --git a/jobs/dpo_ewc/dpo_ewc_cppo_multi_gpu.sh b/jobs/dpo_ewc/dpo_ewc_cppo_multi_gpu.sh new file mode 100644 index 00000000..dd4a2262 --- /dev/null +++ b/jobs/dpo_ewc/dpo_ewc_cppo_multi_gpu.sh @@ -0,0 +1,37 @@ +#!/bin/bash +#SBATCH --job-name=aif-gen-dpo-ewc-cppo +#SBATCH --nodes=1 # Request 2 nodes +#SBATCH --gpus-per-node=h100:4 # Request 4 H100 GPUs per node +#SBATCH --ntasks-per-node=4 # One task per GPU +#SBATCH --cpus-per-task=6 +#SBATCH --mem=64G +#SBATCH --time=24:00:00 +#SBATCH --output=out/%x.%j.out # Include job name + job ID +#SBATCH --error=out/%x.%j.err # Include job name + job ID +#SBATCH --mail-type=ALL +#SBATCH --account=aip-rrabba +#SBATCH --mail-user=shahrad_m@icloud.com # Update with your email + +source .env + +dataset_name='CPPO-RL' + +accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero2.yaml \ + benchmarks/dpo_ewc/dpo_EWC_continual.py \ + --dataset_name 'CPPO-RL' \ + --model_name_or_path Qwen/Qwen2-0.5B-Instruct \ + --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_CPPO_REWARD \ + --learning_rate 5.0e-6 \ + --num_train_epochs 4 \ + --per_device_train_batch_size 8 \ + --gradient_checkpointing \ + --logging_steps 20 \ + --eval_strategy steps \ + --response_length 256 \ + --eval_steps 500 \ + --save_steps 500 \ + --bf16 \ + --output_dir "$SCRATCH/projects/Qwen2-0.5B-DPO-EWC-${dataset_name}" \ + --no_remove_unused_columns \ + --wandb_project $dataset_name \ + --wandb_run_name "Qwen2-0.5B-DPO-EWC-${dataset_name}-multi-gpu" diff --git a/jobs/dpo_ewc/dpo_ewc_domain_shift_multi_gpu.sh b/jobs/dpo_ewc/dpo_ewc_domain_shift_multi_gpu.sh new file mode 100644 index 00000000..2cfcb1a9 --- /dev/null +++ b/jobs/dpo_ewc/dpo_ewc_domain_shift_multi_gpu.sh @@ -0,0 +1,37 @@ +#!/bin/bash +#SBATCH --job-name=aif-gen-dpo-ewc-domain_shift +#SBATCH --nodes=1 # Request 2 nodes +#SBATCH --gpus-per-node=h100:4 # Request 4 H100 GPUs per node +#SBATCH --ntasks-per-node=4 # One task per GPU +#SBATCH --cpus-per-task=6 +#SBATCH --mem=64G +#SBATCH --time=24:00:00 +#SBATCH --output=out/%x.%j.out # Include job name + job ID +#SBATCH --error=out/%x.%j.err # Include job name + job ID +#SBATCH --mail-type=ALL +#SBATCH --account=aip-rrabba +#SBATCH --mail-user=shahrad_m@icloud.com # Update with your email + +source .env + +dataset_name='aifgen-domain-preference-shift' + +accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero2.yaml \ + benchmarks/dpo_ewc/dpo_EWC_continual.py \ + --dataset_name $dataset_name \ + --model_name_or_path Qwen/Qwen2-0.5B-Instruct \ + --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_${dataset_name}_REWARD \ + --learning_rate 5.0e-6 \ + --num_train_epochs 4 \ + --per_device_train_batch_size 8 \ + --gradient_checkpointing \ + --logging_steps 20 \ + --eval_strategy steps \ + --response_length 256 \ + --eval_steps 500 \ + --save_steps 500 \ + --bf16 \ + --output_dir "$SCRATCH/projects/Qwen2-0.5B-DPO-EWC-${dataset_name}" \ + --no_remove_unused_columns \ + --wandb_project $dataset_name \ + --wandb_run_name "Qwen2-0.5B-DPO-EWC-${dataset_name}-multi-gpu" diff --git a/jobs/dpo_ewc/dpo_ewc_lipschitz_multi_gpu.sh b/jobs/dpo_ewc/dpo_ewc_lipschitz_multi_gpu.sh new file mode 100644 index 00000000..7cda0f99 --- /dev/null +++ b/jobs/dpo_ewc/dpo_ewc_lipschitz_multi_gpu.sh @@ -0,0 +1,37 @@ +#!/bin/bash +#SBATCH --job-name=aif-gen-dpo-ewc-lipschitz +#SBATCH --nodes=1 # Request 2 nodes +#SBATCH --gpus-per-node=h100:4 # Request 4 H100 GPUs per node +#SBATCH --ntasks-per-node=4 # One task per GPU +#SBATCH --cpus-per-task=6 +#SBATCH --mem=64G +#SBATCH --time=24:00:00 +#SBATCH --output=out/%x.%j.out # Include job name + job ID +#SBATCH --error=out/%x.%j.err # Include job name + job ID +#SBATCH --mail-type=ALL +#SBATCH --account=aip-rrabba +#SBATCH --mail-user=shahrad_m@icloud.com # Update with your email + +source .env + +dataset_name='aifgen-lipschitz' + +accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero2.yaml \ + benchmarks/dpo_ewc/dpo_EWC_continual.py \ + --dataset_name $dataset_name \ + --model_name_or_path Qwen/Qwen2-0.5B-Instruct \ + --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_${dataset_name}_REWARD \ + --learning_rate 5.0e-6 \ + --num_train_epochs 4 \ + --per_device_train_batch_size 8 \ + --gradient_checkpointing \ + --logging_steps 20 \ + --eval_strategy steps \ + --response_length 256 \ + --eval_steps 500 \ + --save_steps 500 \ + --bf16 \ + --output_dir "$SCRATCH/projects/Qwen2-0.5B-DPO-EWC-${dataset_name}" \ + --no_remove_unused_columns \ + --wandb_project $dataset_name \ + --wandb_run_name "Qwen2-0.5B-DPO-EWC-${dataset_name}-multi-gpu" diff --git a/jobs/dpo_ewc/dpo_ewc_long_piecewise_multi_gpu.sh b/jobs/dpo_ewc/dpo_ewc_long_piecewise_multi_gpu.sh new file mode 100644 index 00000000..39ae8eea --- /dev/null +++ b/jobs/dpo_ewc/dpo_ewc_long_piecewise_multi_gpu.sh @@ -0,0 +1,37 @@ +#!/bin/bash +#SBATCH --job-name=aif-gen-dpo-ewc-long_piecewise +#SBATCH --nodes=1 # Request 2 nodes +#SBATCH --gpus-per-node=h100:4 # Request 4 H100 GPUs per node +#SBATCH --ntasks-per-node=4 # One task per GPU +#SBATCH --cpus-per-task=6 +#SBATCH --mem=64G +#SBATCH --time=24:00:00 +#SBATCH --output=out/%x.%j.out # Include job name + job ID +#SBATCH --error=out/%x.%j.err # Include job name + job ID +#SBATCH --mail-type=ALL +#SBATCH --account=aip-rrabba +#SBATCH --mail-user=shahrad_m@icloud.com # Update with your email + +source .env + +dataset_name='aifgen-long-piecewise' + +accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero2.yaml \ + benchmarks/dpo_ewc/dpo_EWC_continual.py \ + --dataset_name $dataset_name \ + --model_name_or_path Qwen/Qwen2-0.5B-Instruct \ + --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_${dataset_name}_REWARD \ + --learning_rate 5.0e-6 \ + --num_train_epochs 4 \ + --per_device_train_batch_size 8 \ + --gradient_checkpointing \ + --logging_steps 20 \ + --eval_strategy steps \ + --response_length 256 \ + --eval_steps 500 \ + --save_steps 500 \ + --bf16 \ + --output_dir "$SCRATCH/projects/Qwen2-0.5B-DPO-EWC-${dataset_name}" \ + --no_remove_unused_columns \ + --wandb_project $dataset_name \ + --wandb_run_name "Qwen2-0.5B-DPO-EWC-${dataset_name}-multi-gpu" diff --git a/jobs/dpo_ewc/dpo_ewc_piecewise_multi_gpu.sh b/jobs/dpo_ewc/dpo_ewc_piecewise_multi_gpu.sh new file mode 100644 index 00000000..7f477e12 --- /dev/null +++ b/jobs/dpo_ewc/dpo_ewc_piecewise_multi_gpu.sh @@ -0,0 +1,37 @@ +#!/bin/bash +#SBATCH --job-name=aif-gen-dpo-ewc-piecewise-preference-shift +#SBATCH --nodes=1 # Request 2 nodes +#SBATCH --gpus-per-node=h100:4 # Request 4 H100 GPUs per node +#SBATCH --ntasks-per-node=4 # One task per GPU +#SBATCH --cpus-per-task=6 +#SBATCH --mem=64G +#SBATCH --time=24:00:00 +#SBATCH --output=out/%x.%j.out # Include job name + job ID +#SBATCH --error=out/%x.%j.err # Include job name + job ID +#SBATCH --mail-type=ALL +#SBATCH --account=aip-rrabba +#SBATCH --mail-user=shahrad_m@icloud.com # Update with your email + +source .env + +dataset_name='aifgen-piecewise-preference-shift' + +accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero2.yaml \ + benchmarks/dpo_ewc/dpo_EWC_continual.py \ + --dataset_name $dataset_name \ + --model_name_or_path Qwen/Qwen2-0.5B-Instruct \ + --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_${dataset_name}_REWARD \ + --learning_rate 5.0e-6 \ + --num_train_epochs 4 \ + --per_device_train_batch_size 8 \ + --gradient_checkpointing \ + --logging_steps 20 \ + --eval_strategy steps \ + --response_length 256 \ + --eval_steps 500 \ + --save_steps 500 \ + --bf16 \ + --output_dir "$SCRATCH/projects/Qwen2-0.5B-DPO-EWC-${dataset_name}" \ + --no_remove_unused_columns \ + --wandb_project $dataset_name \ + --wandb_run_name "Qwen2-0.5B-DPO-EWC-${dataset_name}-multi-gpu" diff --git a/jobs/dpo_ewc/dpo_ewc_short_piecewise_multi_gpu.sh b/jobs/dpo_ewc/dpo_ewc_short_piecewise_multi_gpu.sh new file mode 100644 index 00000000..9c88d6e3 --- /dev/null +++ b/jobs/dpo_ewc/dpo_ewc_short_piecewise_multi_gpu.sh @@ -0,0 +1,37 @@ +#!/bin/bash +#SBATCH --job-name=aif-gen-dpo-short-piecewise +#SBATCH --nodes=1 # Request 2 nodes +#SBATCH --gpus-per-node=h100:4 # Request 4 H100 GPUs per node +#SBATCH --ntasks-per-node=4 # One task per GPU +#SBATCH --cpus-per-task=6 +#SBATCH --mem=64G +#SBATCH --time=24:00:00 +#SBATCH --output=out/%x.%j.out # Include job name + job ID +#SBATCH --error=out/%x.%j.err # Include job name + job ID +#SBATCH --mail-type=ALL +#SBATCH --account=aip-rrabba +#SBATCH --mail-user=shahrad_m@icloud.com # Update with your email + +source .env + +dataset_name='aifgen-short-piecewise' + +accelerate launch --config_file benchmarks/dpo/accelerate_configs/deepspeed_zero3.yaml \ + benchmarks/dpo_ewc/dpo_EWC_continual.py \ + --dataset_name $dataset_name \ + --model_name_or_path Qwen/Qwen2-0.5B-Instruct \ + --reward_model_path LifelongAlignment/Qwen2.5-0.5B-Instruct_${dataset_name}_REWARD \ + --learning_rate 5.0e-6 \ + --num_train_epochs 4 \ + --per_device_train_batch_size 8 \ + --gradient_checkpointing \ + --logging_steps 20 \ + --eval_strategy steps \ + --response_length 256 \ + --eval_steps 500 \ + --save_steps 500 \ + --bf16 \ + --output_dir "$SCRATCH/projects/Qwen2-0.5B-DPO-${dataset_name}" \ + --no_remove_unused_columns \ + --wandb_project $dataset_name \ + --wandb_run_name "Qwen2-0.5B-DPO-${dataset_name}-multi-gpu"