diff --git a/tests/models/test_bloom_bias_torch_fused.py b/tests/models/test_bloom_bias_torch_fused.py index a453ace5c..ebae699db 100644 --- a/tests/models/test_bloom_bias_torch_fused.py +++ b/tests/models/test_bloom_bias_torch_fused.py @@ -35,9 +35,11 @@ def test_with_torch_fused_cpu(self, backend): backend=BACKEND.TORCH_FUSED, device=DEVICE.CPU, ) - generate_str = tokenizer.decode( - model.generate(**tokenizer("The capital city of France is named", return_tensors="pt").to(model.device), - max_new_tokens=512)[0]) + generate_str = self.generate_with_limit( + model, + tokenizer, + "The capital city of France is named", + ) print(f"generate_str: {generate_str}") diff --git a/tests/models/test_llama3_2_torch_fused.py b/tests/models/test_llama3_2_torch_fused.py index 0f6ad35c0..51a6ac005 100644 --- a/tests/models/test_llama3_2_torch_fused.py +++ b/tests/models/test_llama3_2_torch_fused.py @@ -24,9 +24,11 @@ def test_with_torch_fused_cpu(self, backend): device=DEVICE.CPU, ) tokenizer = model.tokenizer - generate_str = tokenizer.decode( - model.generate(**tokenizer("The capital of France is is", return_tensors="pt").to(model.device), - max_new_tokens=512)[0]) + generate_str = self.generate_with_limit( + model, + tokenizer, + "The capital of France is is", + ) print(f"generate_str: {generate_str}")