-
Notifications
You must be signed in to change notification settings - Fork 25
/
run_Q_learning.py
138 lines (90 loc) · 2.77 KB
/
run_Q_learning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# coding: utf-8
# In[1]:
import gym
# # Trading Framework
#
# This framework is developed based on Tito Ingargiola's amazing work on https://github.com/hackthemarket/gym-trading.
# In[2]:
import gym_trading
from gym_trading.envs.Q_learning import Q
#
# First, define the address for the CSV data
#
# In[3]:
# csv = "/home/adrian/Escritorio/polinex/LTCBTC.csv"
csv2 = "/home/adrian/Escritorio/polinex/EURUSD60.csv"
# # Create a new OpenAI Gym environment with the customised Trading environment
#
#
#
# .initialise_simulator() must be invoked after **env.make('trading-v0')** . Within this function, provide these arguments:
#
#
# * **csv**: Address of the data
#
#
# * **ATR**: True/ False, (The only indicator available now)
#
#
#
# * **trade_period**: (1 - 10), Holding period for each trades. *Default: 1*
#
#
# * **train_split**: (0.0,1.0), Percentage of data set for training. *Default: 0.7*
# In[4]:
env = gym.make('trading-v0')
env.initialise_simulator(csv2, ATR=True, trade_period=5, train_split=0.7)
# # States map
#
# states_map is a discretized observation space bounded by the extreme values Return and ATR, with an interval of 0.5. For every new observation (Return, ATR) tuple pair, it is approximated to the closest pair on states_map. States_map corresponds to the row index of lookup_table
# In[5]:
print(env.sim.states)
# # Next, Create Q_learning framework
#
# This framework wraps around the trading environment.
#
# Arguments:
#
# * **env**: gym_trading Environment
#
# * **train_episodes**: Number of train episodes to update Q_table
#
# * **learning_rate**: *Default: 0.2*
#
# * **gamma**: *Default: 0.9*
#
# Upon initializing, Q_learning has zeroed Q_table **lookup_table** and **states_map**
#
#
#
# In[6]:
Q_learning = Q(env, train_episodes=100, learning_rate=0.2, gamma=0.9)
# # States_map
# **states_map** is a discretized observation space bounded by the extreme values *Return* and *ATR*, with an interval of 0.5.
# For every new observation *(Return, ATR) tuple pair*, it is approximated to the closest pair on **states_map**. **States_map** corresponds to the row index of **lookup_table**
#
# In[7]:
# print(Q_learning.states_map)
# # Q Table
# **lookup_table** has row size the length of **states_map** and column size of 3 (actions (0,1,2).).
# In[8]:
Q_learning.lookup_table[Q_learning.lookup_table!=0]
# All zero now, not trained yet
#
# # Training
#
# Filling up the Q Table
# In[9]:
Q_learning.train()
# After Training, Q Table is complete
# # Testing
#
# Testing the new Q Table on unseen data.
# * Q Table is not updated on Testing mode
# In[10]:
Q_learning.test(100)
# In[11]:
Q_learning._generate_summary_stats()
# All trade entries are kept in env.portfolio.journal
# In[12]:
print(env.portfolio.journal)