From 0223e0127193c3047cab993f8dafcc546341aa51 Mon Sep 17 00:00:00 2001 From: zxw <1020938856@qq.com> Date: Thu, 23 Oct 2025 10:30:10 +0800 Subject: [PATCH] update qwen3-235b-a22b-fp8 --- models/alibaba/qwen3-235b-a22b-fp8/metadata.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/models/alibaba/qwen3-235b-a22b-fp8/metadata.yaml b/models/alibaba/qwen3-235b-a22b-fp8/metadata.yaml index 509b881..7f67a3e 100644 --- a/models/alibaba/qwen3-235b-a22b-fp8/metadata.yaml +++ b/models/alibaba/qwen3-235b-a22b-fp8/metadata.yaml @@ -3,12 +3,16 @@ kind: ModelSpec metadata: name: qwen3-235b-a22b-fp8 spec: + config: + maxTokens: 40960 deployments: - - customRuntimeArgs: [] + - customRuntimeArgs: + - --enable_reasoning + - --reasoning_parser=deepseek_r1 resourceRequirements: cpu: 16 gpuCount: 8 - gpuType: nvidia-vgpu + gpuType: vgpu memory: 640 perGPUMemoryGB: 80 runtime: vllm