# Clone with submodules
git clone git@github.com:Xiaoming-AMD/Training-Benchmark.git
cd Training-Benchmark
git submodule update --init --recursive
# Set up pre-commit hooks (optional, for local linting/formatting)
pip install pre-commit
pre-commit install- All MI355-related scripts are under
scripts/MI355and userun_pretrain_mi355x.shas the unified entry. - All training log files are stored under the path: third_party/Primus/output/date-$(date +%Y%m%d).
Note: Before running the following scripts, please make sure to set the environment variables provided by AMD. These are required to pull the
tasimageDocker images and access Huggingface tokenizers (replace with your own token if needed):export HF_TOKEN="your_hf_token" export DOCKER_LOGIN_USER="login_user" export DOCKER_LOGIN_KEY="login_key"
Run the predefined Qwen3 235B A22B performance sweep:
bash scripts/MI355/perf_test_qwen3_235b_a22b.shThis script:
- Exports common perf flags (
MANUAL_GC,NUMA_BINDING,ENABLE_SYNC_FREE_MOE,ENABLE_TURBO_DEEPEP) - Iterates over a list of
(NNODES, MBS, GBS, PP, EP, PIPELINE_LAYOUT, TRAIN_ITERS)configs - Calls
run_pretrain_mi355x.shfor each config
bash scripts/MI355/perf_test_qwen3_30b_a3b.shThis script:
- Sets
MODEL_NAME=qwen3_30B_A3Band enablesNUMA_BINDING,ENABLE_SYNC_FREE_MOE,ENABLE_TURBO_DEEPEP - Sweeps
(NNODES, MBS, GBS, TP, PP, EP, CP, RECOMPUTE_LAYERS, TRAIN_ITERS, LOG_AVG_SKIP_ITERATIONS) - Calls
run_pretrain_mi355x.shfor each config
bash scripts/MI355/perf_test_qwen3_8b.shThis script:
- Sets
MODEL_NAME=qwen3_8B - Sweeps
(NNODES, MBS, GBS, TRAIN_ITERS) - Calls
run_pretrain_mi355x.shfor each config
bash scripts/MI355/perf_test_llama31_8b.shThis script:
- Sets
MODEL_NAME=llama3.1_8B - Sweeps
(NNODES, MBS, GBS, TRAIN_ITERS) - Calls
run_pretrain_mi355x.shfor each config
bash scripts/MI355/perf_test_llama31_70b.shThis script:
- Sets
MODEL_NAME=llama3.1_70BorMODEL_NAME=llama3.1_70B-Zero - Sweeps
(MODEL_NAME, NNODES, MBS, GBS, TP, PP, VPP, RECOMPUTE_LAYERS, TRAIN_ITERS) - Calls
run_pretrain_mi355x.shfor each config