-
Notifications
You must be signed in to change notification settings - Fork 7
/
t-dlmodel.py
81 lines (64 loc) · 2.21 KB
/
t-dlmodel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import gym
import random
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
# Let's create the Cart Pole OpenAI Gym game environment and define some constants
env = gym.make('CartPole-v1')
env.reset()
goal_steps = 500
score_requirement = 60
intial_games = 10000
def model_data_preparation():
training_data = []
accepted_scores = []
for game_index in range(intial_games):
score = 0
game_memory = []
previous_observation = []
for step_index in range(goal_steps):
action = random.randrange(0, 2)
observation, reward, done, info = env.step(action)
if len(previous_observation) > 0:
game_memory.append([previous_observation, action])
previous_observation = observation
score += reward
if done:
break
if score >= score_requirement:
accepted_scores.append(score)
for data in game_memory:
if data[1] == 1:
output = [0, 1]
elif data[1] == 0:
output = [1, 0]
training_data.append([data[0], output])
env.reset()
print(accepted_scores)
return training_data
training_data = model_data_preparation()
# Please create a binary classifier neural network and train it with *training_data*
trained_model = #YOUR_CODE_HERE
scores = []
choices = []
for each_game in range(100):
score = 0
prev_obs = []
for step_index in range(goal_steps):
env.render()
if len(prev_obs)==0:
action = random.randrange(0,2)
else:
action = np.argmax(trained_model.predict(prev_obs.reshape(-1, len(prev_obs)))[0])
choices.append(action)
new_observation, reward, done, info = env.step(action)
prev_obs = new_observation
score+=reward
if done:
break
env.reset()
scores.append(score)
print(scores)
print('Average Score:', sum(scores)/len(scores))
print('choice 1:{} choice 0:{}'.format(choices.count(1)/len(choices),choices.count(0)/len(choices)))