From 0bf3f4720afb005fbd8f2626983a118e5e66938d Mon Sep 17 00:00:00 2001
From: Qubitium <qubitium@modelcloud.ai>
Date: Fri, 13 Mar 2026 08:49:52 +0000
Subject: [PATCH] use deterministic do_sample=False for some tests

---
 tests/models/test_bloom_bias_torch_fused.py | 8 +++++---
 tests/models/test_llama3_2_torch_fused.py   | 8 +++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/tests/models/test_bloom_bias_torch_fused.py b/tests/models/test_bloom_bias_torch_fused.py
index 50feab038..0f19d4dd8 100644
--- a/tests/models/test_bloom_bias_torch_fused.py
+++ b/tests/models/test_bloom_bias_torch_fused.py
@@ -35,9 +35,11 @@ def test_with_torch_fused_cpu(self, backend):
                 backend=BACKEND.TORCH_FUSED,
                 device=DEVICE.CPU,
             )
-            generate_str = tokenizer.decode(
-                model.generate(**tokenizer("The capital of France is is", return_tensors="pt").to(model.device),
-                               max_new_tokens=512)[0])
+            generate_str = self.generate_with_limit(
+                model,
+                tokenizer,
+                "The capital of France is is",
+            )
 
             print(f"generate_str: {generate_str}")
 
diff --git a/tests/models/test_llama3_2_torch_fused.py b/tests/models/test_llama3_2_torch_fused.py
index 0f6ad35c0..51a6ac005 100644
--- a/tests/models/test_llama3_2_torch_fused.py
+++ b/tests/models/test_llama3_2_torch_fused.py
@@ -24,9 +24,11 @@ def test_with_torch_fused_cpu(self, backend):
             device=DEVICE.CPU,
         )
         tokenizer = model.tokenizer
-        generate_str = tokenizer.decode(
-            model.generate(**tokenizer("The capital of France is is", return_tensors="pt").to(model.device),
-                           max_new_tokens=512)[0])
+        generate_str = self.generate_with_limit(
+            model,
+            tokenizer,
+            "The capital of France is is",
+        )
 
         print(f"generate_str: {generate_str}")