-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot_best_run.m
47 lines (37 loc) · 1.99 KB
/
plot_best_run.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
close all;
addpath("plotter/");
number_of_evals = 20;
load_avg_standard_trpo
trpo_standard_index = trpo_train_mean_evaluation_reward;
trpo_standard_best_run = run1;
plot_trpo_combined
trpo_combined_index = TRPO_train_mean_evaluation_reward;
trpo_combined_best_run = run18;
%find runs manualy
load_avg_standard_ppo
ppo_standard_index = ppo_train_mean_evaluation_reward;
ppo_standard_best_run = run12;
plot_ppo_combined
ppo_combined_index = PPO_train_mean_evaluation_reward;
ppo_combined_best_run = run15;
close all;
%plot best runs
%plot ppo in red is color and trpo in blue
figure(1);
hold on;
plot(iterations_vec, trpo_standard_best_run(:, trpo_standard_index), 'LineWidth', 1, 'Color', [0 0.2470 0.9410]);
plot(iterations_vec, trpo_combined_best_run(:, trpo_combined_index), 'LineWidth', 1, 'Color', [0.3010 0.7450 0.9330]);
plot(iterations_vec, ppo_standard_best_run(:, ppo_standard_index), 'LineWidth', 1, 'color', [0.8350 0.0780 0.0840]);
plot(iterations_vec, ppo_combined_best_run(:, ppo_combined_index), 'LineWidth', 1, 'color', [0.9290 0.6940 0.1250]);
plot(iterations_vec, trpo_standad_avg(:, trpo_standard_index), 'LineWidth', 1 , 'LineStyle', '--', 'Color', [0 0.2470 0.9410]);
plot(iterations_vec, TRPO_Re3_OU_test_mean(:, trpo_combined_index), 'LineWidth', 1, 'LineStyle', '--', 'Color', [0.3010 0.7450 0.9330]);
plot(iterations_vec, PPO_standad_avg(:, ppo_standard_index), 'LineWidth', 1, 'LineStyle', '--', 'color', [0.8350 0.0780 0.0840]);
plot(iterations_vec, PPO_Re3_OU_test_mean(:, ppo_combined_index), 'LineWidth', 1, 'LineStyle', '--', 'color', [0.9290 0.6940 0.1250]);
yline(300, '--', "300");
legend("TRPO standard", "TRPO Re3 OU test", "PPO standard", "PPO Re3 OU test", "TRPO standard avg", "TRPO Re3 OU test avg", "PPO standard avg", "PPO Re3 OU test avg");
legend("Location", "southeast");
xlabel("Iterations");
ylabel("Mean evaluation reward");
title("Best and average runs for TRPO and PPO with and without the aditions");
xticks([0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150]);
hold off;