diff --git a/examples/geo3k_vlm/README.md b/examples/geo3k_vlm/README.md
index 800cca542..b6dbfa43c 100644
--- a/examples/geo3k_vlm/README.md
+++ b/examples/geo3k_vlm/README.md
@@ -2,6 +2,10 @@
Training VLMs with FSDP or Megatron on single-turn reasoning task using GRPO on the [GEO3K dataset](https://huggingface.co/datasets/hiyouga/geometry3k). We used processed version [here](https://huggingface.co/datasets/chenhegu/geo3k_imgurl).
+Supported models:
+* Qwen2.5-VL
+* Qwen3-VL (Dense and Moe)
+
diff --git a/examples/geo3k_vlm/run_geo3k_vlm.sh b/examples/geo3k_vlm/run_geo3k_vlm.sh
index 95b1d1f36..0185214be 100644
--- a/examples/geo3k_vlm/run_geo3k_vlm.sh
+++ b/examples/geo3k_vlm/run_geo3k_vlm.sh
@@ -15,6 +15,10 @@ DATASET_LOCAL_NAME=$(basename "$DATASET_NAME")
# Validate MODEL_NAME
VALID_MODELS="
+ Qwen2.5-VL-3B-Instruct
+ Qwen2.5-VL-7B-Instruct
+ Qwen2.5-VL-32B-Instruct
+ Qwen2.5-VL-72B-Instruct
Qwen3-VL-2B-Instruct
Qwen3-VL-4B-Instruct
Qwen3-VL-8B-Instruct
@@ -80,7 +84,7 @@ fi
# Common args
CKPT_ARGS=(
--hf-checkpoint /root/models/${MODEL_NAME}
- # vl model has rotary base 5000000
+ # qwen3 vl model has rotary base 5000000, set it when applicable
--rotary-base 5000000
)
@@ -187,7 +191,7 @@ else
# get MODEL_ARGS from scripts/models for megatron backend
SLIME_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." &>/dev/null && pwd)"
- MODEL_ARGS_FILE=$(echo "$MODEL_NAME" | sed 's/-Instruct//g; s/-Thinking//g; s/Qwen3-VL-/qwen3-/g; s/-2B/-1.7B/g')
+ MODEL_ARGS_FILE=$(echo "$MODEL_NAME" | sed 's/-Instruct//g; s/-Thinking//g; s/Qwen2.5-VL-/qwen2.5-/g; s/Qwen3-VL-/qwen3-/g; s/-2B/-1.7B/g')
source "${SLIME_DIR}/scripts/models/${MODEL_ARGS_FILE}.sh"
fi
diff --git a/examples/geo3k_vlm/run_geo3k_vlm_sft.sh b/examples/geo3k_vlm/run_geo3k_vlm_sft.sh
index 94eb02f3e..35bc96c99 100644
--- a/examples/geo3k_vlm/run_geo3k_vlm_sft.sh
+++ b/examples/geo3k_vlm/run_geo3k_vlm_sft.sh
@@ -6,6 +6,10 @@ DATASET_LOCAL_NAME=$(basename "$DATASET_NAME")
# Validate MODEL_NAME
VALID_MODELS="
+ Qwen2.5-VL-3B-Instruct
+ Qwen2.5-VL-7B-Instruct
+ Qwen2.5-VL-32B-Instruct
+ Qwen2.5-VL-72B-Instruct
Qwen3-VL-2B-Instruct
Qwen3-VL-4B-Instruct
Qwen3-VL-8B-Instruct
@@ -151,7 +155,7 @@ else
# get MODEL_ARGS from scripts/models for megatron backend
SLIME_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." &>/dev/null && pwd)"
- MODEL_ARGS_FILE=$(echo "$MODEL_NAME" | sed 's/-Instruct//g; s/-Thinking//g; s/Qwen3-VL-/qwen3-/g; s/-2B/-1.7B/g')
+ MODEL_ARGS_FILE=$(echo "$MODEL_NAME" | sed 's/-Instruct//g; s/-Thinking//g; s/Qwen2.5-VL-/qwen2.5-/g; s/Qwen3-VL-/qwen3-/g; s/-2B/-1.7B/g')
source "${SLIME_DIR}/scripts/models/${MODEL_ARGS_FILE}.sh"
fi