ModelCloud · Qubitium · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026
diff --git a/defuser/model_registry.py b/defuser/model_registry.py
@@ -4,6 +4,9 @@
 # Contact: qubitium@modelcloud.ai, x.com/qubitium
 
 MODEL_CONFIG = {
+    "mixtral": {
+        "min_transformers_version": "5.0.0",
+    },
     "qwen3_moe": {
         "min_transformers_version": "5.0.0",
     },

diff --git a/pyproject.toml b/pyproject.toml
@@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "Defuser"
-version = "0.0.7"
+version = "0.0.8"
 description = "Model defuser helper for HF Transformers."
 readme = "README.md"
 requires-python = ">=3.9"

diff --git a/tests/test_convert_model.py b/tests/test_convert_model.py
@@ -71,3 +71,45 @@ def test_qwen3_5_moe():
     torch.testing.assert_close(expert0.gate_proj.weight, expected_gate)
     torch.testing.assert_close(expert0.up_proj.weight, expected_up)
     torch.testing.assert_close(expert0.down_proj.weight, expected_down)
+
+
+def test_mixtral():
+    from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock
+
+    model_path = "/monster/data/model/Mixtral-8x7B-Instruct-v0.1" # "mistralai/Mixtral-8x7B-Instruct-v0.1"
+    config = AutoConfig.from_pretrained(model_path)
+    config.num_hidden_layers = 1
+    model = AutoModelForCausalLM.from_pretrained(
+        model_path,
+        config=config,
+        ignore_mismatched_sizes=True,
+    )
+    assert model.config.model_type == "mixtral"
+
+    original_moe_block = model.model.layers[0].mlp
+    assert isinstance(original_moe_block, MixtralSparseMoeBlock)
+
+    hidden_dim = original_moe_block.experts.gate_up_proj.shape[-1]
+    intermediate_dim = original_moe_block.experts.gate_up_proj.shape[1] // 2
+
+    expected_gate = original_moe_block.experts.gate_up_proj[0, :intermediate_dim, :hidden_dim].contiguous().clone()
+    expected_up = original_moe_block.experts.gate_up_proj[0, intermediate_dim:, :hidden_dim].contiguous().clone()
+    expected_down = original_moe_block.experts.down_proj[0, :hidden_dim, :intermediate_dim].contiguous().clone()
+
+    converted = convert_model(model, cleanup_original=False, max_layers=1)
+    assert converted
+
+    moe_block = model.model.layers[0].mlp
+    experts = moe_block.experts
+
+    assert hasattr(experts, "0")
+    expert0 = getattr(experts, "0")
+    assert hasattr(expert0, "gate_proj")
+    assert hasattr(expert0, "up_proj")
+    assert hasattr(expert0, "down_proj")
+
+    materialize_model(model.model.layers[0])
+
+    torch.testing.assert_close(expert0.gate_proj.weight, expected_gate)
+    torch.testing.assert_close(expert0.up_proj.weight, expected_up)
+    torch.testing.assert_close(expert0.down_proj.weight, expected_down)