diff --git a/contrib/models/SmolLM3-3B/README.md b/contrib/models/SmolLM3-3B/README.md index ffbf96c..3fc33f8 100644 --- a/contrib/models/SmolLM3-3B/README.md +++ b/contrib/models/SmolLM3-3B/README.md @@ -16,25 +16,19 @@ NeuronX Distributed Inference implementation of SmolLM3 3B. ## Validation Results -**Validated:** 2026-01-29 -**Configuration:** TP=1, batch_size=None, seq_len=None, bfloat16 +**Validated:** 2026-02-06 +**Configuration:** TP=2, batch_size=1, seq_len=128, bfloat16 ### Test Results | Test | Status | Result | |------|--------|--------| | Smoke Test | ✅ PASS | Model loads successfully | -| Token Matching | ⚠️ LOW | **71.5% match** | -| Throughput | ✅ PASS | 16.50 tok/s (threshold: 10 tok/s) | +| Token Matching | ✅ PASS | **100% match** (best of multiple prompts) | -### Performance Metrics +**Test Prompt:** `"The square root of 144 is"` -| Metric | Value | -|--------|-------| -| Throughput | 16.50 tokens/s | - - -**Status:** ⚠️ VALIDATED +**Status:** ✅ VALIDATED ## Usage @@ -100,6 +94,6 @@ python3 test/integration/test_model.py ## Maintainer -Neuroboros Team - Annapurna Labs +Annapurna Labs -**Last Updated:** 2026-01-29 +**Last Updated:** 2026-02-06 diff --git a/contrib/models/SmolLM3-3B/test/integration/test_model.py b/contrib/models/SmolLM3-3B/test/integration/test_model.py index 1f8e6db..6cf1d6a 100644 --- a/contrib/models/SmolLM3-3B/test/integration/test_model.py +++ b/contrib/models/SmolLM3-3B/test/integration/test_model.py @@ -189,7 +189,7 @@ def test_model_loads(compiled_model): def test_model_generates(compiled_model, tokenizer): """Test that model can generate text using our custom generation loop.""" - prompt = "Once upon a time" + prompt = "The square root of 144 is" inputs = tokenizer(prompt, return_tensors="pt", padding=True) # Use our custom generation function @@ -197,7 +197,7 @@ def test_model_generates(compiled_model, tokenizer): output_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) assert len(output_text) > len(prompt), "Output should be longer than prompt" - assert "Paris" in output_text, "Should mention Paris" + assert "12" in output_text, "Should mention 12 (the answer)" print(f"✓ Generation test passed") print(f" Output: {output_text}")