From f893c67d701d8bf79999ec1da480d4b57333c333 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 12 Mar 2026 20:06:54 +0800 Subject: [PATCH 1/3] supports mixtral Signed-off-by: ZX-ModelCloud --- defuser/model_registry.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/defuser/model_registry.py b/defuser/model_registry.py index 1e7a33d..31be712 100644 --- a/defuser/model_registry.py +++ b/defuser/model_registry.py @@ -4,6 +4,9 @@ # Contact: qubitium@modelcloud.ai, x.com/qubitium MODEL_CONFIG = { + "mixtral": { + "min_transformers_version": "5.0.0", + }, "qwen3_moe": { "min_transformers_version": "5.0.0", }, From 2b5e868b1cfb873e26732f9c54d0c2866a94a062 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 12 Mar 2026 20:07:28 +0800 Subject: [PATCH 2/3] Bump version from 0.0.6 to 0.0.8 Signed-off-by: ZX-ModelCloud --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8c0dcd2..1f45e93 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta" [project] name = "Defuser" -version = "0.0.7" +version = "0.0.8" description = "Model defuser helper for HF Transformers." readme = "README.md" requires-python = ">=3.9" From 20161eb9954e607a40d8d2f0fac640ae56c7bb37 Mon Sep 17 00:00:00 2001 From: ZX-ModelCloud Date: Thu, 12 Mar 2026 20:11:51 +0800 Subject: [PATCH 3/3] add test_mixtral() Signed-off-by: ZX-ModelCloud --- tests/test_convert_model.py | 42 +++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/test_convert_model.py b/tests/test_convert_model.py index 718d5f6..c03f90b 100644 --- a/tests/test_convert_model.py +++ b/tests/test_convert_model.py @@ -71,3 +71,45 @@ def test_qwen3_5_moe(): torch.testing.assert_close(expert0.gate_proj.weight, expected_gate) torch.testing.assert_close(expert0.up_proj.weight, expected_up) torch.testing.assert_close(expert0.down_proj.weight, expected_down) + + +def test_mixtral(): + from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock + + model_path = "/monster/data/model/Mixtral-8x7B-Instruct-v0.1" # "mistralai/Mixtral-8x7B-Instruct-v0.1" + config = AutoConfig.from_pretrained(model_path) + config.num_hidden_layers = 1 + model = AutoModelForCausalLM.from_pretrained( + model_path, + config=config, + ignore_mismatched_sizes=True, + ) + assert model.config.model_type == "mixtral" + + original_moe_block = model.model.layers[0].mlp + assert isinstance(original_moe_block, MixtralSparseMoeBlock) + + hidden_dim = original_moe_block.experts.gate_up_proj.shape[-1] + intermediate_dim = original_moe_block.experts.gate_up_proj.shape[1] // 2 + + expected_gate = original_moe_block.experts.gate_up_proj[0, :intermediate_dim, :hidden_dim].contiguous().clone() + expected_up = original_moe_block.experts.gate_up_proj[0, intermediate_dim:, :hidden_dim].contiguous().clone() + expected_down = original_moe_block.experts.down_proj[0, :hidden_dim, :intermediate_dim].contiguous().clone() + + converted = convert_model(model, cleanup_original=False, max_layers=1) + assert converted + + moe_block = model.model.layers[0].mlp + experts = moe_block.experts + + assert hasattr(experts, "0") + expert0 = getattr(experts, "0") + assert hasattr(expert0, "gate_proj") + assert hasattr(expert0, "up_proj") + assert hasattr(expert0, "down_proj") + + materialize_model(model.model.layers[0]) + + torch.testing.assert_close(expert0.gate_proj.weight, expected_gate) + torch.testing.assert_close(expert0.up_proj.weight, expected_up) + torch.testing.assert_close(expert0.down_proj.weight, expected_down) \ No newline at end of file