From 2e795ad940950f3b06d40064f3b965b57e3237ca Mon Sep 17 00:00:00 2001 From: TimZF Date: Sun, 26 Apr 2020 16:20:23 +0200 Subject: [PATCH] Temperature not working With the current implementation the t parameter does not have an effect on the probabilities. And other implementations also use power. (for example https://github.com/werner-duvaud/muzero-general orhttps://github.com/Zeta36/muzero) --- muzero/self_play/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/muzero/self_play/utils.py b/muzero/self_play/utils.py index ec49217..52045b9 100644 --- a/muzero/self_play/utils.py +++ b/muzero/self_play/utils.py @@ -53,7 +53,7 @@ def value(self) -> Optional[float]: def softmax_sample(visit_counts, actions, t): - counts_exp = np.exp(visit_counts) * (1 / t) + counts_exp = np.exp(visit_counts) ** (1 / t) probs = counts_exp / np.sum(counts_exp, axis=0) action_idx = np.random.choice(len(actions), p=probs) return actions[action_idx]