forked from Coac/CommNet-BiCnet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
guessing_sum_env.py
48 lines (34 loc) · 1.26 KB
/
guessing_sum_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import numpy as np
class GuessingSumEnv:
def __init__(self, num_agents=5):
self.num_agents = num_agents
self.sum = 0
self.scale = 10.0
self.sum_scale = self.num_agents * self.scale
def step(self, actions):
if actions.shape != (self.num_agents, 1):
raise Exception('got input shape ', actions.shape, ' instead of ', (self.num_agents, 1))
observations = None
rewards = -np.abs(actions - self.sum) # [-Inf ; 0]
normalized_rewards = (np.maximum(rewards, -self.sum_scale) + self.sum_scale) / self.sum_scale # [0 ; 1]
done = True
info = None
return observations, normalized_rewards, done, info
def reset(self):
observations = np.clip(np.random.normal(size=(self.num_agents, 1)), -self.scale, self.scale)
self.sum = np.sum(observations)
return observations
def render(self, mode='human'):
return
def close(self):
return
def seed(self, seed=None):
np.random.seed(seed)
return
if __name__ == '__main__':
env = GuessingSumEnv()
env.seed(0)
print('obs:', env.reset())
actions = np.random.normal(size=(env.num_agents, 1))
print('actions:', actions)
print('rewards:', env.step(actions))