terraform -chdir="terraform" apply
# SSH into VM
ssh -i "llm.pem" ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com
git clone https://github.com/AdarshNandanwar/Language-Model-Math-Solver-via-Code-Generation.git hw6
cd /home/ubuntu/hw6conda env create -f environment.yml
conda activate llmhw6# Setup environment and start jupyter server
export LC_ALL=en_US.utf-8
conda create --name llmhw6 python=3.10
# If you run into error like `UnavailableInvalidChannel: HTTP 403 FORBIDDEN for channel <some channel>` on your EC2 instance, you can solve it by running `conda config --remove channels https://aws-ml-conda-ec2.s3.us-west-2.amazonaws.com`, and make sure you have the default channel by running `conda config --add channels defaults`.
conda config --remove channels https://aws-ml-conda-ec2.s3.us-west-2.amazonaws.com
conda config --add channels defaults
conda init bash
exit
# Restart shell
conda activate llmhw6
conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
pip install --upgrade pip
pip3 install -r ~/hw6/requirements.txt --use-pep517
pip install -qqq flash-attnjupyter notebook --no-browser
# http://localhost:2222/tree?token=894eb5eeee4d8d4142d79773fbcedbfaef1e8eabb3a9cdcf
# In another window
ssh -i llm.pem -L 2222:localhost:8888 ubuntu@ec2-98-84-134-186.compute-1.amazonaws.compython generate_clean_dataset.py# python generate_prompts.py --model_id google/gemma-2-2b-it
# rm -r generations/google/gemma-2-2b-it
# python inference.py --model_id google/gemma-2-2b-it --max_samples 932 --use_vllm 1
# python evaluate.py --model_id google/gemma-2-2b-it
# python metrics.py --model_id google/gemma-2-2b-it
python generate_prompts.py --model_id meta-llama/Llama-3.2-3B-Instruct
rm -r generations/meta-llama/Llama-3.2-3B-Instruct
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_samples 932 --use_vllm 1
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct
python generate_prompts.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct
rm -r generations/Qwen/Qwen2.5-Coder-7B-Instruct
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_samples 932 --use_vllm 1
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-InstructNote - For finetuning, we used the prompt1 verbalizer.
export HUGGINGFACE_TOKEN=""
export WANDB_API_KEY=""
# Llama Hyperparameter tuning
python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_1.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_1 --max_samples 100 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_1
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_1
python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_2.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_2 --max_samples 100 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_2
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_2
python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_3.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_3 --max_samples 100 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_3
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_3
# Llama finetuning
python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --batch_size 2 --config_file configs/config_llama.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_llama --max_samples 932 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_llama
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_llama
# Qwen Hyperparameter tuning
python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_1.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_1 --max_samples 100 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_1
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_1
python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_2.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_2 --max_samples 100 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_2
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_2
python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_3.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_3 --max_samples 100 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_3
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_3
# Qwen finetuning
python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --batch_size 2 --config_file configs/config_qwen.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_qwen --max_samples 932 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_qwen
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_qwenSetup Hugging Face accelerate configuration and start training using the following commands.
accelerate config
# The config file is copied in the root of the repo "accelerate_config.yaml"
accelerate launch --config_file /home/ubuntu/.cache/huggingface/accelerate/default_config.yaml finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --batch_size 8 --config_file configs/config_llama.json
accelerate launch --config_file /home/ubuntu/.cache/huggingface/accelerate/default_config.yaml finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --batch_size 4 --config_file configs/config_qwen.jsonscp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/prompts .
scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/generations .
scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/evaluation .
scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/metrics .terraform -chdir="terraform" destroy