From b2035f456296b5a94b6561e038b542ff7057e7ff Mon Sep 17 00:00:00 2001 From: cklxx <1293822641@qq.com> Date: Fri, 26 Dec 2025 14:38:14 +0800 Subject: [PATCH] Handle deepscaler answers without markers --- slime/rollout/rm_hub/deepscaler.py | 15 ++++++++++----- tests/test_math_reward_utils.py | 6 ++++++ 2 files changed, 16 insertions(+), 5 deletions(-) create mode 100644 tests/test_math_reward_utils.py diff --git a/slime/rollout/rm_hub/deepscaler.py b/slime/rollout/rm_hub/deepscaler.py index 39d4de383..38db16b3c 100644 --- a/slime/rollout/rm_hub/deepscaler.py +++ b/slime/rollout/rm_hub/deepscaler.py @@ -1,12 +1,17 @@ from .math_utils import extract_answer, grade_answer_mathd, grade_answer_sympy -def get_deepscaler_rule_based_reward(response, label): +def _extract_solution_text(response: str) -> str | None: if "" in response: - model_solution = response.split("")[-1] - elif "###Response" in response: - model_solution = response.split("###Response")[1] - else: + return response.split("")[-1] + if "###Response" in response: + return response.split("###Response")[1] + return response + + +def get_deepscaler_rule_based_reward(response, label): + model_solution = _extract_solution_text(response) + if not model_solution: return 0 model_answer = extract_answer(model_solution) diff --git a/tests/test_math_reward_utils.py b/tests/test_math_reward_utils.py new file mode 100644 index 000000000..d85e56a1e --- /dev/null +++ b/tests/test_math_reward_utils.py @@ -0,0 +1,6 @@ +from slime.rollout.rm_hub.deepscaler import get_deepscaler_rule_based_reward + + +def test_deepscaler_reward_fallback_without_markers(): + response = "Answer: \\boxed{42}" + assert get_deepscaler_rule_based_reward(response, "42") == 1