Skip to content

nikJ13/Language-Model-Math-Solver-via-Code-Generation

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

98 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Setup

terraform -chdir="terraform" apply

# SSH into VM
ssh -i "llm.pem" ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com
git clone https://github.com/AdarshNandanwar/Language-Model-Math-Solver-via-Code-Generation.git hw6
cd /home/ubuntu/hw6

Option 1 (Conda environment.yml)

conda env create -f environment.yml
conda activate llmhw6

Option2 (Manual setup using pip)

# Setup environment and start jupyter server
export LC_ALL=en_US.utf-8
conda create --name llmhw6 python=3.10
# If you run into error like `UnavailableInvalidChannel: HTTP 403 FORBIDDEN for channel <some channel>` on your EC2 instance, you can solve it by running `conda config --remove channels https://aws-ml-conda-ec2.s3.us-west-2.amazonaws.com`, and make sure you have the default channel by running `conda config --add channels defaults`.
conda config --remove channels https://aws-ml-conda-ec2.s3.us-west-2.amazonaws.com
conda config --add channels defaults
conda init bash
exit
# Restart shell
conda activate llmhw6
conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
pip install --upgrade pip
pip3 install -r ~/hw6/requirements.txt --use-pep517
pip install -qqq flash-attn

Setup Jupyter Notebook

jupyter notebook --no-browser
# http://localhost:2222/tree?token=894eb5eeee4d8d4142d79773fbcedbfaef1e8eabb3a9cdcf

# In another window
ssh -i llm.pem -L 2222:localhost:8888 ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com

Execution

Dataset

python generate_clean_dataset.py

In-context

# python generate_prompts.py --model_id google/gemma-2-2b-it
# rm -r generations/google/gemma-2-2b-it
# python inference.py --model_id google/gemma-2-2b-it --max_samples 932 --use_vllm 1
# python evaluate.py --model_id google/gemma-2-2b-it
# python metrics.py --model_id google/gemma-2-2b-it

python generate_prompts.py --model_id meta-llama/Llama-3.2-3B-Instruct
rm -r generations/meta-llama/Llama-3.2-3B-Instruct
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_samples 932 --use_vllm 1
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct

python generate_prompts.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct
rm -r generations/Qwen/Qwen2.5-Coder-7B-Instruct
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_samples 932 --use_vllm 1
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct

PEFT

Note - For finetuning, we used the prompt1 verbalizer.

export HUGGINGFACE_TOKEN=""
export WANDB_API_KEY=""


# Llama Hyperparameter tuning
python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_1.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_1 --max_samples 100 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_1
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_1

python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_2.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_2 --max_samples 100 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_2
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_2

python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_3.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_3 --max_samples 100 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_3
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_3

# Llama finetuning
python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --batch_size 2 --config_file configs/config_llama.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_llama --max_samples 932 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_llama
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_llama


# Qwen Hyperparameter tuning
python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_1.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_1 --max_samples 100 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_1
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_1

python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_2.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_2 --max_samples 100 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_2
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_2

python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_3.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_3 --max_samples 100 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_3
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_3

# Qwen finetuning
python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --batch_size 2 --config_file configs/config_qwen.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_qwen --max_samples 932 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_qwen
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_qwen

Distributed finetuning

Setup Hugging Face accelerate configuration and start training using the following commands.

accelerate config
# The config file is copied in the root of the repo "accelerate_config.yaml"

accelerate launch --config_file /home/ubuntu/.cache/huggingface/accelerate/default_config.yaml finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --batch_size 8 --config_file configs/config_llama.json
accelerate launch --config_file /home/ubuntu/.cache/huggingface/accelerate/default_config.yaml finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --batch_size 4 --config_file configs/config_qwen.json

Download outputs

scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/prompts .
scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/generations .
scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/evaluation .
scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/metrics .

Teardown

terraform -chdir="terraform" destroy

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published