-
Notifications
You must be signed in to change notification settings - Fork 0
/
bot.py
70 lines (62 loc) · 2.32 KB
/
bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import numpy as np
class Botenv:
def __init__(self, T, dt, ENV_SIZE, V):
self.ENV_SIZE = ENV_SIZE
self.V = V
self.dt = dt
self.T = T
self.gamma = 1e-3
self.reset()
def reset(self):
# Agent Position
self.x = np.random.randint(0, self.ENV_SIZE)
self.y = np.random.randint(0, self.ENV_SIZE)
self.theta = np.random.uniform(0, 2*np.pi)
self.hit_boundary_x = 0
self.hit_boundary_y = 0
# Target Position
self.target_pos = np.random.rand(1, 2)[0]*self.ENV_SIZE
"""
# Obstacle Positions
NUM_OBS = 3
self.obs_pos = np.random.rand(2, NUM_OBS)*self.ENV_SIZE
"""
# Distance from Target
self.target_dist = np.hypot(self.x - self.target_pos[0], self.y - self.target_pos[1])
# State Space
self.state = np.array([self.x, self.y, self.theta, self.target_dist])
self.T = 10
self.done = False
def action(self, w):
self.T -= self.dt
curr_state = self.state
self.theta += w * self.dt * 10
if self.theta > 2*np.pi:
self.theta -= 2*np.pi
if self.x > self.ENV_SIZE or self.x < 0:
self.hit_boundary_x += 1
if self.y > self.ENV_SIZE or self.y < 0:
self.hit_boundary_y += 1
self.vx = ((-1)**(self.hit_boundary_x)) * (self.V * np.cos(self.theta))
self.vy = ((-1)**(self.hit_boundary_y)) * (self.V * np.sin(self.theta))
self.x += self.vx * self.dt
self.y += self.vy * self.dt
self.target_dist = np.hypot(self.x - self.target_pos[0], self.y - self.target_pos[1])
# Add Code for Obs distance here
if(self.target_dist <= 0.5):
self.done = True
print(f"Agent hit target at timestep: {self.T//self.dt}")
if(self.T <= 0):
self.done = True
# Update State
self.state = np.array([self.x, self.y, self.theta, self.target_dist])
reward = self.get_reward(curr_state)
return(self.state, reward, self.done)
def get_reward(self, state):
if state[3] <= 0.35:
return(1000)
elif self.T>0:
reward = 1 - np.exp(0.05 * state[3])
return(reward)
else:
return(-1000)