From 74a5e4e44cbb721fa1a646abe46a806bd96c4b9b Mon Sep 17 00:00:00 2001 From: ibpark-moreh Date: Wed, 4 Feb 2026 19:30:42 +0900 Subject: [PATCH] MAF-19213: Enhance Helm templates for DeepSeek and Qwen models - Added model organization, name, role, accelerator vendor, model, and parallelism labels to DeepSeek-R1 decode and prefill templates. - Updated quickstart templates for DeepSeek-R1 and Qwen models to include similar labels for consistency. - Introduced role and parallelism labels in runtime base templates for decode and prefill configurations, ensuring better resource management and clarity across deployments. --- .../vllm-deepseek-r1-decode-mi300x-dp8ep.helm.yaml | 6 ++++++ .../vllm-deepseek-r1-prefill-mi300x-dp8ep.helm.yaml | 6 ++++++ ...epseek-ai-deepseek-r1-decode-amd-mi300x-dp8ep8.helm.yaml | 6 ++++++ ...pseek-ai-deepseek-r1-prefill-amd-mi300x-dp8ep8.helm.yaml | 6 ++++++ ...meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml | 6 ++++++ ...eta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml | 6 ++++++ ...ama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml | 6 ++++++ ...ma-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml | 6 ++++++ ...ma-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml | 6 ++++++ ...a-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml | 6 ++++++ .../quickstart-vllm-qwen-qwen3-1.7b-amd-mi250-tp2.helm.yaml | 6 ++++++ ...quickstart-vllm-qwen-qwen3-1.7b-amd-mi300x-tp2.helm.yaml | 6 ++++++ ...tart-vllm-qwen-qwen3-1.7b-decode-amd-mi250-tp2.helm.yaml | 6 ++++++ ...art-vllm-qwen-qwen3-1.7b-decode-amd-mi300x-tp2.helm.yaml | 6 ++++++ ...art-vllm-qwen-qwen3-1.7b-prefill-amd-mi250-tp2.helm.yaml | 6 ++++++ ...rt-vllm-qwen-qwen3-1.7b-prefill-amd-mi300x-tp2.helm.yaml | 6 ++++++ .../templates/runtime-bases/vllm-decode-dp.helm.yaml | 2 ++ .../templates/runtime-bases/vllm-decode-pp.helm.yaml | 2 ++ .../templates/runtime-bases/vllm-decode.helm.yaml | 1 + .../templates/runtime-bases/vllm-dp.helm.yaml | 2 ++ .../templates/runtime-bases/vllm-pp.helm.yaml | 2 ++ .../templates/runtime-bases/vllm-prefill-dp.helm.yaml | 2 ++ .../templates/runtime-bases/vllm-prefill-pp.helm.yaml | 2 ++ .../templates/runtime-bases/vllm-prefill.helm.yaml | 1 + .../templates/runtime-bases/vllm.helm.yaml | 1 + 25 files changed, 111 insertions(+) diff --git a/deploy/helm/moai-inference-preset/templates/presets/deepseek-r1/vllm-deepseek-r1-decode-mi300x-dp8ep.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/deepseek-r1/vllm-deepseek-r1-decode-mi300x-dp8ep.helm.yaml index b5d63c2..8310cb6 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/deepseek-r1/vllm-deepseek-r1-decode-mi300x-dp8ep.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/deepseek-r1/vllm-deepseek-r1-decode-mi300x-dp8ep.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: deepseek-ai + mif.moreh.io/model.name: deepseek-r1 + mif.moreh.io/role: decode + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi300x + mif.moreh.io/parallelism: dp8ep8 spec: parallelism: data: 8 diff --git a/deploy/helm/moai-inference-preset/templates/presets/deepseek-r1/vllm-deepseek-r1-prefill-mi300x-dp8ep.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/deepseek-r1/vllm-deepseek-r1-prefill-mi300x-dp8ep.helm.yaml index f321d08..2ba8d3d 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/deepseek-r1/vllm-deepseek-r1-prefill-mi300x-dp8ep.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/deepseek-r1/vllm-deepseek-r1-prefill-mi300x-dp8ep.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: deepseek-ai + mif.moreh.io/model.name: deepseek-r1 + mif.moreh.io/role: prefill + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi300x + mif.moreh.io/parallelism: dp8ep8 spec: parallelism: data: 8 diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-deepseek-ai-deepseek-r1-decode-amd-mi300x-dp8ep8.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-deepseek-ai-deepseek-r1-decode-amd-mi300x-dp8ep8.helm.yaml index fa82ded..ddb5d9e 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-deepseek-ai-deepseek-r1-decode-amd-mi300x-dp8ep8.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-deepseek-ai-deepseek-r1-decode-amd-mi300x-dp8ep8.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: deepseek-ai + mif.moreh.io/model.name: deepseek-r1 + mif.moreh.io/role: decode + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi300x + mif.moreh.io/parallelism: dp8ep8 spec: parallelism: data: 8 diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-deepseek-ai-deepseek-r1-prefill-amd-mi300x-dp8ep8.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-deepseek-ai-deepseek-r1-prefill-amd-mi300x-dp8ep8.helm.yaml index 556fc3d..db17433 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-deepseek-ai-deepseek-r1-prefill-amd-mi300x-dp8ep8.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-deepseek-ai-deepseek-r1-prefill-amd-mi300x-dp8ep8.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: deepseek-ai + mif.moreh.io/model.name: deepseek-r1 + mif.moreh.io/role: prefill + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi300x + mif.moreh.io/parallelism: dp8ep8 spec: parallelism: data: 8 diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml index fe38209..9c8f3aa 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi250-tp2.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: meta-llama + mif.moreh.io/model.name: llama-3.2-1b-instruct + mif.moreh.io/role: e2e + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi250 + mif.moreh.io/parallelism: tp2 spec: parallelism: tensor: 2 diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml index 561ae37..2232afe 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-amd-mi300x-tp2.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: meta-llama + mif.moreh.io/model.name: llama-3.2-1b-instruct + mif.moreh.io/role: e2e + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi300x + mif.moreh.io/parallelism: tp2 spec: parallelism: tensor: 2 diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml index 7f12bb6..8bdbd57 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi250-tp2.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: meta-llama + mif.moreh.io/model.name: llama-3.2-1b-instruct + mif.moreh.io/role: decode + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi250 + mif.moreh.io/parallelism: tp2 spec: parallelism: tensor: 2 diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml index 33b51c8..ba66c2f 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-decode-amd-mi300x-tp2.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: meta-llama + mif.moreh.io/model.name: llama-3.2-1b-instruct + mif.moreh.io/role: decode + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi300x + mif.moreh.io/parallelism: tp2 spec: parallelism: tensor: 2 diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml index 4f34298..1e868a8 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi250-tp2.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: meta-llama + mif.moreh.io/model.name: llama-3.2-1b-instruct + mif.moreh.io/role: prefill + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi250 + mif.moreh.io/parallelism: tp2 spec: parallelism: tensor: 2 diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml index 946cc38..5d1b950 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-meta-llama-llama-3.2-1b-instruct-prefill-amd-mi300x-tp2.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: meta-llama + mif.moreh.io/model.name: llama-3.2-1b-instruct + mif.moreh.io/role: prefill + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi300x + mif.moreh.io/parallelism: tp2 spec: parallelism: tensor: 2 diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi250-tp2.helm.yaml index 49a6b79..4394ace 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi250-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi250-tp2.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: qwen + mif.moreh.io/model.name: qwen3-1.7b + mif.moreh.io/role: e2e + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi250 + mif.moreh.io/parallelism: tp2 spec: parallelism: tensor: 2 diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi300x-tp2.helm.yaml index 9cc538e..b9a4681 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi300x-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-amd-mi300x-tp2.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: qwen + mif.moreh.io/model.name: qwen3-1.7b + mif.moreh.io/role: e2e + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi300x + mif.moreh.io/parallelism: tp2 spec: parallelism: tensor: 2 diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi250-tp2.helm.yaml index 2fa2518..72c8af1 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi250-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi250-tp2.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: qwen + mif.moreh.io/model.name: qwen3-1.7b + mif.moreh.io/role: decode + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi250 + mif.moreh.io/parallelism: tp2 spec: parallelism: tensor: 2 diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi300x-tp2.helm.yaml index 037bbfa..0d1a706 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi300x-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-decode-amd-mi300x-tp2.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: qwen + mif.moreh.io/model.name: qwen3-1.7b + mif.moreh.io/role: decode + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi300x + mif.moreh.io/parallelism: tp2 spec: parallelism: tensor: 2 diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi250-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi250-tp2.helm.yaml index 8b3ccb4..b9b14d6 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi250-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi250-tp2.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: qwen + mif.moreh.io/model.name: qwen3-1.7b + mif.moreh.io/role: prefill + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi250 + mif.moreh.io/parallelism: tp2 spec: parallelism: tensor: 2 diff --git a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi300x-tp2.helm.yaml b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi300x-tp2.helm.yaml index 936ed2f..c9ff80d 100644 --- a/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi300x-tp2.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/presets/quickstart/quickstart-vllm-qwen-qwen3-1.7b-prefill-amd-mi300x-tp2.helm.yaml @@ -5,6 +5,12 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.preset.labels" . | nindent 4 }} + mif.moreh.io/model.org: qwen + mif.moreh.io/model.name: qwen3-1.7b + mif.moreh.io/role: prefill + mif.moreh.io/accelerator.vendor: amd + mif.moreh.io/accelerator.model: mi300x + mif.moreh.io/parallelism: tp2 spec: parallelism: tensor: 2 diff --git a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-decode-dp.helm.yaml b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-decode-dp.helm.yaml index 1bca467..8c6275b 100644 --- a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-decode-dp.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-decode-dp.helm.yaml @@ -5,6 +5,8 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.runtimeBase.labels" . | nindent 4 }} + mif.moreh.io/role: decode + mif.moreh.io/parallelism: dp spec: workerTemplate: metadata: diff --git a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-decode-pp.helm.yaml b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-decode-pp.helm.yaml index 9f8107d..a08a9b8 100644 --- a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-decode-pp.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-decode-pp.helm.yaml @@ -5,6 +5,8 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.runtimeBase.labels" . | nindent 4 }} + mif.moreh.io/role: decode + mif.moreh.io/parallelism: pp spec: template: metadata: diff --git a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-decode.helm.yaml b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-decode.helm.yaml index 94b1db1..553e4d0 100644 --- a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-decode.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-decode.helm.yaml @@ -5,6 +5,7 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.runtimeBase.labels" . | nindent 4 }} + mif.moreh.io/role: decode spec: template: metadata: diff --git a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-dp.helm.yaml b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-dp.helm.yaml index 5ee02bd..e34bb12 100644 --- a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-dp.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-dp.helm.yaml @@ -5,6 +5,8 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.runtimeBase.labels" . | nindent 4 }} + mif.moreh.io/role: e2e + mif.moreh.io/parallelism: dp spec: workerTemplate: metadata: diff --git a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-pp.helm.yaml b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-pp.helm.yaml index 7928208..9219342 100644 --- a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-pp.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-pp.helm.yaml @@ -5,6 +5,8 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.runtimeBase.labels" . | nindent 4 }} + mif.moreh.io/role: e2e + mif.moreh.io/parallelism: pp spec: template: metadata: diff --git a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-prefill-dp.helm.yaml b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-prefill-dp.helm.yaml index 838596f..82e1935 100644 --- a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-prefill-dp.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-prefill-dp.helm.yaml @@ -5,6 +5,8 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.runtimeBase.labels" . | nindent 4 }} + mif.moreh.io/role: prefill + mif.moreh.io/parallelism: dp spec: workerTemplate: metadata: diff --git a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-prefill-pp.helm.yaml b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-prefill-pp.helm.yaml index 08cb6f2..7c36d42 100644 --- a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-prefill-pp.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-prefill-pp.helm.yaml @@ -5,6 +5,8 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.runtimeBase.labels" . | nindent 4 }} + mif.moreh.io/role: prefill + mif.moreh.io/parallelism: pp spec: template: metadata: diff --git a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-prefill.helm.yaml b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-prefill.helm.yaml index 578136c..ed0feb3 100644 --- a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-prefill.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm-prefill.helm.yaml @@ -5,6 +5,7 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.runtimeBase.labels" . | nindent 4 }} + mif.moreh.io/role: prefill spec: template: metadata: diff --git a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm.helm.yaml b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm.helm.yaml index c6521ce..5be6118 100644 --- a/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm.helm.yaml +++ b/deploy/helm/moai-inference-preset/templates/runtime-bases/vllm.helm.yaml @@ -5,6 +5,7 @@ metadata: namespace: {{ include "common.names.namespace" . }} labels: {{- include "mif.runtimeBase.labels" . | nindent 4 }} + mif.moreh.io/role: e2e spec: template: spec: