diff --git a/contrib/models/SmolLM3-3B/README.md b/contrib/models/SmolLM3-3B/README.md
index ffbf96c..3fc33f8 100644
--- a/contrib/models/SmolLM3-3B/README.md
+++ b/contrib/models/SmolLM3-3B/README.md
@@ -16,25 +16,19 @@ NeuronX Distributed Inference implementation of SmolLM3 3B.
 
 ## Validation Results
 
-**Validated:** 2026-01-29  
-**Configuration:** TP=1, batch_size=None, seq_len=None, bfloat16
+**Validated:** 2026-02-06  
+**Configuration:** TP=2, batch_size=1, seq_len=128, bfloat16
 
 ### Test Results
 
 | Test | Status | Result |
 |------|--------|--------|
 | Smoke Test | ✅ PASS | Model loads successfully |
-| Token Matching | ⚠️ LOW | **71.5% match** |
-| Throughput | ✅ PASS | 16.50 tok/s (threshold: 10 tok/s) |
+| Token Matching | ✅ PASS | **100% match** (best of multiple prompts) |
 
-### Performance Metrics
+**Test Prompt:** `"The square root of 144 is"`
 
-| Metric | Value |
-|--------|-------|
-| Throughput | 16.50 tokens/s |
-
-
-**Status:** ⚠️ VALIDATED
+**Status:** ✅ VALIDATED
 
 ## Usage
 
@@ -100,6 +94,6 @@ python3 test/integration/test_model.py
 
 ## Maintainer
 
-Neuroboros Team - Annapurna Labs
+Annapurna Labs
 
-**Last Updated:** 2026-01-29
+**Last Updated:** 2026-02-06
diff --git a/contrib/models/SmolLM3-3B/test/integration/test_model.py b/contrib/models/SmolLM3-3B/test/integration/test_model.py
index 1f8e6db..6cf1d6a 100644
--- a/contrib/models/SmolLM3-3B/test/integration/test_model.py
+++ b/contrib/models/SmolLM3-3B/test/integration/test_model.py
@@ -189,7 +189,7 @@ def test_model_loads(compiled_model):
 
 def test_model_generates(compiled_model, tokenizer):
     """Test that model can generate text using our custom generation loop."""
-    prompt = "Once upon a time"
+    prompt = "The square root of 144 is"
     inputs = tokenizer(prompt, return_tensors="pt", padding=True)
     
     # Use our custom generation function
@@ -197,7 +197,7 @@ def test_model_generates(compiled_model, tokenizer):
     output_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
     
     assert len(output_text) > len(prompt), "Output should be longer than prompt"
-    assert "Paris" in output_text, "Should mention Paris"
+    assert "12" in output_text, "Should mention 12 (the answer)"
     print(f"✓ Generation test passed")
     print(f"  Output: {output_text}")