-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patheval.py
More file actions
93 lines (71 loc) · 2.68 KB
/
eval.py
File metadata and controls
93 lines (71 loc) · 2.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import json
import torch
import re
from tqdm import tqdm
from unsloth import FastLanguageModel
from datasets import load_dataset
base_model_name = "unsloth/Qwen2.5-Coder-0.5B-bnb-4bit"
lora_model_path = "./models/leetcode_lora/checkpoint-498"
full_ft_model_path = "./models/leetcode_full_finetune/checkpoint-168"
device = "cuda" if torch.cuda.is_available() else "cpu"
def extract_code(text):
pattern = r"<\|im_start\|>assistant\n(.*?)(?:<\|im_end\|>|$)"
match = re.search(pattern, text, re.DOTALL)
return match.group(1).strip() if match else text
def run_eval(model, tokenizer, dataset, tag):
results = []
for entry in tqdm(dataset, desc=f"Evaluating {tag}"):
instruction = entry["problem_description"]
prompt = (
"<|im_start|>system\nYou are a competitive programmer.<|im_end|>\n"
f"<|im_start|>user\n{instruction}<|im_end|>\n"
"<|im_start|>assistant\n"
)
inputs = tokenizer([prompt], return_tensors="pt").to(device)
outputs = model.generate(
**inputs,
max_new_tokens=512,
pad_token_id=tokenizer.eos_token_id,
)
decoded = tokenizer.batch_decode(outputs)[0]
generated_code = extract_code(decoded)
results.append({
"problem": instruction[:200] + "...",
"generated_code": generated_code,
})
return results
eval_dataset = load_dataset("newfacade/LeetCodeDataset", split="train").select(range(5))
base_model, tokenizer = FastLanguageModel.from_pretrained(
model_name=base_model_name,
load_in_4bit=True,
)
FastLanguageModel.for_inference(base_model)
lora_model, _ = FastLanguageModel.from_pretrained(
model_name=lora_model_path,
load_in_4bit=True,
)
FastLanguageModel.for_inference(lora_model)
full_ft_model, _ = FastLanguageModel.from_pretrained(
model_name=full_ft_model_path,
load_in_4bit=True,
)
FastLanguageModel.for_inference(full_ft_model)
base_results = run_eval(base_model, tokenizer, eval_dataset, "BASE_MODEL")
lora_results = run_eval(lora_model, tokenizer, eval_dataset, "LORA_TUNED_MODEL")
full_ft_results = run_eval(full_ft_model, tokenizer, eval_dataset, "FULL_FINETUNED_MODEL")
output_file = "eval_results_all_models.json"
final_data = {
"total_samples": len(eval_dataset),
"models": {
"base_model": base_model_name,
"lora_model_path": lora_model_path,
"full_ft_model_path": full_ft_model_path,
},
"results": {
"base_model_outputs": base_results,
"lora_model_outputs": lora_results,
"full_finetuned_outputs": full_ft_results,
}
}
with open(output_file, "w", encoding="utf-8") as f:
json.dump(final_data, f, indent=4)