Skip to content

Commit 6b2baf6

Browse files
authored
Merge pull request #520 from LLNL/abmarl-0-pacman-simplifications
Abmarl 0 pacman simplifications
2 parents 7f820e1 + 68e0fbd commit 6b2baf6

File tree

2 files changed

+79
-83
lines changed

2 files changed

+79
-83
lines changed

abmarl/examples/sim/pacman.py

+70-83
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010

1111
class PacmanAgent(MovingAgent, OrientationAgent, GridObservingAgent):
12-
def __init__(self, move_range=1, view_range=100, initial_health=1, **kwargs):
12+
def __init__(self, move_range=1, view_range=20, initial_health=1, **kwargs):
1313
super().__init__(
1414
move_range=move_range,
1515
view_range=view_range,
@@ -31,7 +31,7 @@ def __init__(self, render_size=50, initial_health=1, **kwargs):
3131

3232

3333
class BaddieAgent(MovingAgent, OrientationAgent, GridObservingAgent):
34-
def __init__(self, move_range=1, view_range=100, **kwargs):
34+
def __init__(self, move_range=1, view_range=0, **kwargs):
3535
super().__init__(
3636
move_range=move_range,
3737
view_range=view_range,
@@ -84,7 +84,7 @@ def reward_scheme(self, value):
8484
else:
8585
self._reward_scheme = {
8686
'bad_move': -0.1,
87-
'entropy': 0.01,
87+
'entropy': -0.01,
8888
'eat_food': 0.1,
8989
'kill': 1,
9090
'die': -1,
@@ -239,9 +239,9 @@ def step(self, action_dict, **kwargs):
239239
self.rewards['pacman'] += self.reward_scheme['entropy']
240240
if np.array_equal(self.pacman.position, np.array([9, 0])):
241241
self.grid.remove(self.pacman, (9, 0))
242-
self.grid.place(self.pacman, (9, 20))
243-
elif np.array_equal(self.pacman.position, np.array([9, 20])):
244-
self.grid.remove(self.pacman, (9, 20))
242+
self.grid.place(self.pacman, (9, 18))
243+
elif np.array_equal(self.pacman.position, np.array([9, 18])):
244+
self.grid.remove(self.pacman, (9, 18))
245245
self.grid.place(self.pacman, (9, 0))
246246

247247
# Compute overlaps with pacman
@@ -263,18 +263,9 @@ def step(self, action_dict, **kwargs):
263263
'baddie_0': {'move': 0},
264264
'baddie_1': {'move': 0},
265265
'baddie_2': {'move': 0},
266-
'baddie_3': {'move': 1},
267-
'baddie_4': {'move': np.random.randint(0, 5)},
268-
'baddie_5': {'move': 3},
269-
'baddie_6': {'move': 0},
270-
'baddie_7': {'move': 0},
271-
'baddie_8': {'move': 0},
272-
'baddie_9': {'move': 0},
266+
'baddie_3': {'move': 0},
267+
'baddie_4': {'move': 0},
273268
}
274-
if self.step_count == 0:
275-
action_dict['baddie_2']['move'] = 4
276-
action_dict['baddie_6']['move'] = 4
277-
action_dict['baddie_9']['move'] = 3
278269
if self.step_count % 10 == 0:
279270
action_dict['baddie_0']['move'] = 3
280271
action_dict['baddie_1']['move'] = 1
@@ -287,43 +278,39 @@ def step(self, action_dict, **kwargs):
287278
elif self.step_count % 10 == 8:
288279
action_dict['baddie_0']['move'] = 4
289280
action_dict['baddie_1']['move'] = 4
290-
if (self.step_count - 8) % 16 == 0:
291-
if self.agents['baddie_2'].orientation == 4:
292-
action_dict['baddie_2']['move'] = 2
293-
action_dict['baddie_6']['move'] = 2
294-
action_dict['baddie_9']['move'] = 3
295-
else:
296-
action_dict['baddie_2']['move'] = 4
297-
action_dict['baddie_6']['move'] = 4
298-
action_dict['baddie_9']['move'] = 1
299281
if self.step_count % 14 == 0:
300-
action_dict['baddie_7']['move'] = 3
301-
action_dict['baddie_8']['move'] = 1
282+
action_dict['baddie_3']['move'] = 3
283+
action_dict['baddie_4']['move'] = 1
302284
elif self.step_count % 14 == 3:
303-
action_dict['baddie_7']['move'] = 2
304-
action_dict['baddie_8']['move'] = 2
285+
action_dict['baddie_3']['move'] = 2
286+
action_dict['baddie_4']['move'] = 2
305287
elif self.step_count % 14 == 7:
306-
action_dict['baddie_7']['move'] = 1
307-
action_dict['baddie_8']['move'] = 3
288+
action_dict['baddie_3']['move'] = 1
289+
action_dict['baddie_4']['move'] = 3
308290
elif self.step_count % 14 == 9:
309-
action_dict['baddie_7']['move'] = 4
310-
action_dict['baddie_8']['move'] = 4
291+
action_dict['baddie_3']['move'] = 4
292+
action_dict['baddie_4']['move'] = 4
311293
elif self.step_count % 14 == 11:
312-
action_dict['baddie_7']['move'] = 1
313-
action_dict['baddie_8']['move'] = 3
294+
action_dict['baddie_3']['move'] = 1
295+
action_dict['baddie_4']['move'] = 3
314296
elif self.step_count % 14 == 12:
315-
action_dict['baddie_7']['move'] = 4
316-
action_dict['baddie_8']['move'] = 4
297+
action_dict['baddie_3']['move'] = 4
298+
action_dict['baddie_4']['move'] = 4
299+
if self.step_count % 13 == 0:
300+
if self.agents['baddie_2'].orientation == 3:
301+
action_dict['baddie_2']['move'] = 1
302+
else:
303+
action_dict['baddie_2']['move'] = 3
317304

318305
# Now move the baddies and compute overlaps with pacman
319306
for agent_id, action in action_dict.items():
320307
agent = self.agents[agent_id]
321308
move_result = self.move_actor.process_action(agent, action, **kwargs)
322309
if np.array_equal(agent.position, np.array([9, 0])):
323310
self.grid.remove(agent, (9, 0))
324-
self.grid.place(agent, (9, 20))
325-
elif np.array_equal(agent.position, np.array([9, 20])):
326-
self.grid.remove(agent, (9, 20))
311+
self.grid.place(agent, (9, 18))
312+
elif np.array_equal(agent.position, np.array([9, 18])):
313+
self.grid.remove(agent, (9, 18))
327314
self.grid.place(agent, (9, 0))
328315

329316
# Compute overlaps with pacman
@@ -345,46 +332,46 @@ def example_grid(self):
345332
An example grid for playing the pacman game.
346333
"""
347334
return np.array([
348-
['_', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W',
349-
'W', 'W', 'W', 'W', 'W', 'W', '_'],
350-
['_', 'W', 'B', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'W', 'F', 'F', 'F',
351-
'F', 'F', 'F', 'F', 'B', 'W', '_'],
352-
['_', 'W', 'F', 'W', 'W', 'F', 'W', 'W', 'W', 'F', 'W', 'F', 'W', 'W',
353-
'W', 'F', 'W', 'W', 'F', 'W', '_'],
354-
['_', 'W', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F',
355-
'F', 'F', 'F', 'F', 'F', 'W', '_'],
356-
['_', 'W', 'F', 'W', 'W', 'F', 'W', 'F', 'W', 'W', 'W', 'W', 'W', 'F',
357-
'W', 'F', 'W', 'W', 'F', 'W', '_'],
358-
['_', 'W', 'F', 'F', 'F', 'F', 'W', 'F', 'F', 'F', 'W', 'F', 'F', 'F',
359-
'W', 'F', 'F', 'F', 'F', 'W', '_'],
360-
['_', 'W', 'W', 'W', 'W', 'F', 'W', 'W', 'W', '_', 'W', '_', 'W', 'W',
361-
'W', 'F', 'W', 'W', 'W', 'W', '_'],
362-
['_', '_', '_', '_', 'W', 'F', 'W', '_', '_', '_', '_', '_', '_', '_',
363-
'W', 'F', 'W', '_', '_', '_', '_'],
364-
['W', 'W', 'W', 'W', 'W', 'F', 'W', '_', 'W', 'W', 'F', 'W', 'W', '_',
365-
'W', 'F', 'W', 'W', 'W', 'W', 'W'],
366-
['_', '_', '_', '_', '_', 'B', '_', '_', 'B', 'F', 'B', 'F', 'B', '_',
367-
'_', 'B', '_', '_', '_', '_', '_'],
368-
['W', 'W', 'W', 'W', 'W', 'F', 'W', '_', 'W', 'W', 'F', 'W', 'W', '_',
369-
'W', 'F', 'W', 'W', 'W', 'W', 'W'],
370-
['_', '_', '_', '_', 'W', 'F', 'W', '_', '_', '_', '_', '_', '_', '_',
371-
'W', 'F', 'W', '_', '_', '_', '_'],
372-
['_', 'W', 'W', 'W', 'W', 'F', 'W', '_', 'W', 'W', 'W', 'W', 'W', '_',
373-
'W', 'F', 'W', 'W', 'W', 'W', '_'],
374-
['_', 'W', 'B', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'W', 'F', 'F', 'F',
375-
'F', 'F', 'F', 'F', 'B', 'W', '_'],
376-
['_', 'W', 'F', 'W', 'W', 'F', 'W', 'W', 'W', 'F', 'W', 'F', 'W', 'W',
377-
'W', 'F', 'W', 'W', 'F', 'W', '_'],
378-
['_', 'W', 'F', 'F', 'W', 'F', 'F', 'F', 'F', 'F', 'P', 'F', 'F', 'F',
379-
'F', 'F', 'W', 'F', 'F', 'W', '_'],
380-
['_', 'W', 'W', 'F', 'W', 'F', 'W', 'F', 'W', 'W', 'W', 'W', 'W', 'F',
381-
'W', 'F', 'W', 'F', 'W', 'W', '_'],
382-
['_', 'W', 'F', 'F', 'F', 'F', 'W', 'F', 'F', 'F', 'W', 'F', 'F', 'F',
383-
'W', 'F', 'F', 'F', 'F', 'W', '_'],
384-
['_', 'W', 'F', 'W', 'W', 'W', 'W', 'W', 'W', 'F', 'W', 'F', 'W', 'W',
385-
'W', 'W', 'W', 'W', 'F', 'W', '_'],
386-
['_', 'W', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'B', 'F', 'F', 'F',
387-
'F', 'F', 'F', 'F', 'F', 'W', '_'],
388-
['_', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W',
389-
'W', 'W', 'W', 'W', 'W', 'W', '_'],
335+
['W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W',
336+
'W', 'W', 'W', 'W', 'W', 'W'],
337+
['W', 'B', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'W', 'F', 'F', 'F',
338+
'F', 'F', 'F', 'F', 'B', 'W'],
339+
['W', 'F', 'W', 'W', 'F', 'W', 'W', 'W', 'F', 'W', 'F', 'W', 'W',
340+
'W', 'F', 'W', 'W', 'F', 'W'],
341+
['W', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F',
342+
'F', 'F', 'F', 'F', 'F', 'W'],
343+
['W', 'F', 'W', 'W', 'F', 'W', 'F', 'W', 'W', 'W', 'W', 'W', 'F',
344+
'W', 'F', 'W', 'W', 'F', 'W'],
345+
['W', 'F', 'F', 'F', 'F', 'W', 'F', 'F', 'F', 'W', 'F', 'F', 'F',
346+
'W', 'F', 'F', 'F', 'F', 'W'],
347+
['W', 'W', 'W', 'W', 'F', 'W', 'W', 'W', '_', 'W', '_', 'W', 'W',
348+
'W', 'F', 'W', 'W', 'W', 'W'],
349+
['W', 'F', 'F', 'W', 'F', 'W', '_', '_', '_', '_', '_', '_', '_',
350+
'W', 'F', 'W', 'F', 'F', 'W'],
351+
['W', 'F', 'F', 'W', 'F', 'W', '_', 'W', 'W', 'F', 'W', 'W', '_',
352+
'W', 'F', 'W', 'F', 'F', 'W'],
353+
['_', '_', '_', '_', 'F', '_', '_', 'W', 'F', 'F', 'F', 'W', 'B',
354+
'_', 'F', '_', '_', '_', '_'],
355+
['W', 'F', 'F', 'W', 'F', 'W', '_', 'W', 'W', 'W', 'W', 'W', '_',
356+
'W', 'F', 'W', 'F', 'F', 'W'],
357+
['W', 'F', 'F', 'W', 'F', 'W', '_', '_', '_', '_', '_', '_', '_',
358+
'W', 'F', 'W', 'F', 'F', 'W'],
359+
['W', 'W', 'W', 'W', 'F', 'W', '_', 'W', 'W', 'W', 'W', 'W', '_',
360+
'W', 'F', 'W', 'W', 'W', 'W'],
361+
['W', 'B', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'W', 'F', 'F', 'F',
362+
'F', 'F', 'F', 'F', 'B', 'W'],
363+
['W', 'F', 'W', 'W', 'F', 'W', 'W', 'W', 'F', 'W', 'F', 'W', 'W',
364+
'W', 'F', 'W', 'W', 'F', 'W'],
365+
['W', 'F', 'F', 'W', 'F', 'F', 'F', 'F', 'F', 'P', 'F', 'F', 'F',
366+
'F', 'F', 'W', 'F', 'F', 'W'],
367+
['W', 'W', 'F', 'W', 'F', 'W', 'F', 'W', 'W', 'W', 'W', 'W', 'F',
368+
'W', 'F', 'W', 'F', 'W', 'W'],
369+
['W', 'F', 'F', 'F', 'F', 'W', 'F', 'F', 'F', 'W', 'F', 'F', 'F',
370+
'W', 'F', 'F', 'F', 'F', 'W'],
371+
['W', 'F', 'W', 'W', 'W', 'W', 'W', 'W', 'F', 'W', 'F', 'W', 'W',
372+
'W', 'W', 'W', 'W', 'F', 'W'],
373+
['W', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F', 'F',
374+
'F', 'F', 'F', 'F', 'F', 'W'],
375+
['W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W', 'W',
376+
'W', 'W', 'W', 'W', 'W', 'W'],
390377
])

examples/rllib_pacman.py

+9
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,20 @@
3232
render_color='r'
3333
),
3434
}
35+
extra_agents = {
36+
'pacman': PacmanAgent(
37+
id='pacman',
38+
encoding=1,
39+
view_range=2,
40+
render_color='yellow',
41+
)
42+
}
3543
sim = MultiAgentWrapper(
3644
AllStepManager(
3745
PacmanSim.build_sim_from_array(
3846
PacmanSim.example_grid,
3947
object_registry,
48+
# extra_agents=extra_agents,
4049
states={'PositionState', 'OrientationState', 'HealthState'},
4150
observers={'AbsoluteEncodingObserver'},
4251
overlapping={1: {3, 4}, 4: {3, 4}},

0 commit comments

Comments
 (0)