forked from TDay1/tutorial11
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot_comparison.py
More file actions
35 lines (28 loc) · 1.11 KB
/
plot_comparison.py
File metadata and controls
35 lines (28 loc) · 1.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import numpy as np
import matplotlib.pyplot as plt
import os
rewards_e30k = np.load('plots/LunarLander-v3_rewards_LunarLander-v3_epsilon_decay_30000.npy')
rewards_e50k = np.load('plots/LunarLander-v3_rewards_LunarLander-v3_hidden_size_256.npy')
rewards_e70k = np.load('plots/LunarLander-v3_rewards_LunarLander-v3_epsilon_decay_70000.npy')
def calculate_r100(rewards):
r100_values = []
for i in range(len(rewards)):
if i < 99:
r100_values.append(np.mean(rewards[:i+1]))
else:
r100_values.append(np.mean(rewards[i-99:i+1]))
return r100_values
plt.figure(figsize=(12, 6))
for rewards, label in [(rewards_e30k, 'decay=30k'),
(rewards_e50k, 'decay=50k'),
(rewards_e70k, 'decay=70k')]:
plt.plot(rewards, alpha=0.3)
plt.plot(calculate_r100(rewards), label=label)
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.title('Epsilon Decay Comparison - LunarLander-v3')
plt.legend()
plt.grid(True, alpha=0.3)
plt.axhline(y=200, color='r', linestyle='--', label='Target Reward (200)')
plt.savefig('plots/epsilon_decay_comparison_LunarLander.pdf')
plt.close()