From 0bf3f4720afb005fbd8f2626983a118e5e66938d Mon Sep 17 00:00:00 2001 From: Qubitium Date: Fri, 13 Mar 2026 08:49:52 +0000 Subject: [PATCH] use deterministic do_sample=False for some tests --- tests/models/test_bloom_bias_torch_fused.py | 8 +++++--- tests/models/test_llama3_2_torch_fused.py | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/models/test_bloom_bias_torch_fused.py b/tests/models/test_bloom_bias_torch_fused.py index 50feab038..0f19d4dd8 100644 --- a/tests/models/test_bloom_bias_torch_fused.py +++ b/tests/models/test_bloom_bias_torch_fused.py @@ -35,9 +35,11 @@ def test_with_torch_fused_cpu(self, backend): backend=BACKEND.TORCH_FUSED, device=DEVICE.CPU, ) - generate_str = tokenizer.decode( - model.generate(**tokenizer("The capital of France is is", return_tensors="pt").to(model.device), - max_new_tokens=512)[0]) + generate_str = self.generate_with_limit( + model, + tokenizer, + "The capital of France is is", + ) print(f"generate_str: {generate_str}") diff --git a/tests/models/test_llama3_2_torch_fused.py b/tests/models/test_llama3_2_torch_fused.py index 0f6ad35c0..51a6ac005 100644 --- a/tests/models/test_llama3_2_torch_fused.py +++ b/tests/models/test_llama3_2_torch_fused.py @@ -24,9 +24,11 @@ def test_with_torch_fused_cpu(self, backend): device=DEVICE.CPU, ) tokenizer = model.tokenizer - generate_str = tokenizer.decode( - model.generate(**tokenizer("The capital of France is is", return_tensors="pt").to(model.device), - max_new_tokens=512)[0]) + generate_str = self.generate_with_limit( + model, + tokenizer, + "The capital of France is is", + ) print(f"generate_str: {generate_str}")