GitHub - nikJ13/Language-Model-Math-Solver-via-Code-Generation

Setup

terraform -chdir="terraform" apply

# SSH into VM
ssh -i "llm.pem" ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com
git clone https://github.com/AdarshNandanwar/Language-Model-Math-Solver-via-Code-Generation.git hw6
cd /home/ubuntu/hw6

Option 1 (Conda environment.yml)

conda env create -f environment.yml
conda activate llmhw6

Option2 (Manual setup using pip)

# Setup environment and start jupyter server
export LC_ALL=en_US.utf-8
conda create --name llmhw6 python=3.10
# If you run into error like `UnavailableInvalidChannel: HTTP 403 FORBIDDEN for channel <some channel>` on your EC2 instance, you can solve it by running `conda config --remove channels https://aws-ml-conda-ec2.s3.us-west-2.amazonaws.com`, and make sure you have the default channel by running `conda config --add channels defaults`.
conda config --remove channels https://aws-ml-conda-ec2.s3.us-west-2.amazonaws.com
conda config --add channels defaults
conda init bash
exit
# Restart shell
conda activate llmhw6
conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
pip install --upgrade pip
pip3 install -r ~/hw6/requirements.txt --use-pep517
pip install -qqq flash-attn

Setup Jupyter Notebook

jupyter notebook --no-browser
# http://localhost:2222/tree?token=894eb5eeee4d8d4142d79773fbcedbfaef1e8eabb3a9cdcf

# In another window
ssh -i llm.pem -L 2222:localhost:8888 ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com

Execution

Dataset

python generate_clean_dataset.py

In-context

# python generate_prompts.py --model_id google/gemma-2-2b-it
# rm -r generations/google/gemma-2-2b-it
# python inference.py --model_id google/gemma-2-2b-it --max_samples 932 --use_vllm 1
# python evaluate.py --model_id google/gemma-2-2b-it
# python metrics.py --model_id google/gemma-2-2b-it

python generate_prompts.py --model_id meta-llama/Llama-3.2-3B-Instruct
rm -r generations/meta-llama/Llama-3.2-3B-Instruct
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_samples 932 --use_vllm 1
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct

python generate_prompts.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct
rm -r generations/Qwen/Qwen2.5-Coder-7B-Instruct
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_samples 932 --use_vllm 1
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct

PEFT

Note - For finetuning, we used the prompt1 verbalizer.

export HUGGINGFACE_TOKEN=""
export WANDB_API_KEY=""


# Llama Hyperparameter tuning
python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_1.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_1 --max_samples 100 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_1
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_1

python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_2.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_2 --max_samples 100 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_2
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_2

python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_3.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_3 --max_samples 100 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_3
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_3

# Llama finetuning
python finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --batch_size 2 --config_file configs/config_llama.json
python inference.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_llama --max_samples 932 --use_vllm 0
python evaluate.py --model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_llama
python metrics.py --model_id meta-llama/Llama-3.2-3B-Instruct --ft_model_id meta-llama/Llama-3.2-3B-Instruct-math-solver-config_llama


# Qwen Hyperparameter tuning
python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_1.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_1 --max_samples 100 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_1
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_1

python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_2.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_2 --max_samples 100 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_2
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_2

python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --max_train_samples 1000 --batch_size 2 --config_file configs/config_3.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_3 --max_samples 100 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_3
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_3

# Qwen finetuning
python finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --batch_size 2 --config_file configs/config_qwen.json
python inference.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_qwen --max_samples 932 --use_vllm 0
python evaluate.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_qwen
python metrics.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --ft_model_id Qwen/Qwen2.5-Coder-7B-Instruct-math-solver-config_qwen

Distributed finetuning

Setup Hugging Face accelerate configuration and start training using the following commands.

accelerate config
# The config file is copied in the root of the repo "accelerate_config.yaml"

accelerate launch --config_file /home/ubuntu/.cache/huggingface/accelerate/default_config.yaml finetune.py --model_id meta-llama/Llama-3.2-3B-Instruct --batch_size 8 --config_file configs/config_llama.json
accelerate launch --config_file /home/ubuntu/.cache/huggingface/accelerate/default_config.yaml finetune.py --model_id Qwen/Qwen2.5-Coder-7B-Instruct --batch_size 4 --config_file configs/config_qwen.json

Download outputs

scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/prompts .
scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/generations .
scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/evaluation .
scp -i llm.pem -r ubuntu@ec2-98-84-134-186.compute-1.amazonaws.com:~/hw6/metrics .

Teardown

terraform -chdir="terraform" destroy

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Repository files navigation

Setup

Option 1 (Conda environment.yml)

Option2 (Manual setup using pip)

Setup Jupyter Notebook

Execution

Dataset

In-context

PEFT

Distributed finetuning

Download outputs

Teardown

About

Uh oh!

Releases

Packages

Languages

Name		Name	Last commit message	Last commit date
Latest commit History 98 Commits
Presentation.key		Presentation.key
configs		configs
data		data
evaluation		evaluation
generations		generations
latex		latex
metrics		metrics
prompts		prompts
terraform		terraform
.gitignore		.gitignore
README.md		README.md
accelerate_config.yaml		accelerate_config.yaml
environment.yml		environment.yml
evaluate.py		evaluate.py
finetune.py		finetune.py
generate_clean_dataset.py		generate_clean_dataset.py
generate_prompts.py		generate_prompts.py
handout.pdf		handout.pdf
inference.py		inference.py
metrics.py		metrics.py
model.txt		model.txt
presentation.pdf		presentation.pdf
presentation_recording.mov		presentation_recording.mov
report.pdf		report.pdf
requirements.txt		requirements.txt
utils.py		utils.py

nikJ13/Language-Model-Math-Solver-via-Code-Generation

Folders and files

Latest commit

History

Repository files navigation

Setup

Option 1 (Conda environment.yml)

Option2 (Manual setup using pip)

Setup Jupyter Notebook

Execution

Dataset

In-context

PEFT

Distributed finetuning

Download outputs

Teardown

About

Resources

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages