Skip to content

Commit 5084ea2

Browse files
committed
Renamed VoxelEnv to Megaverse
1 parent eaf9e97 commit 5084ea2

File tree

109 files changed

+274
-283
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

109 files changed

+274
-283
lines changed

Makefile

-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
PYTHON ?= python
22

3-
export REPO=voxel-rl
43
export BASE_TAG=$(shell ${PYTHON} -c 'import hashlib; sha = hashlib.sha1((open("docker/Dockerfile.base").read() + open("requirements/requirements.txt").read()).encode()); print(sha.hexdigest())')
54
BRANCH = $(shell git rev-parse --abbrev-ref HEAD)
65
VERSION = $(shell git rev-parse --short HEAD)

README.md

+7-7
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
# voxel-rl
1+
# Megaverse
22

33
```
44
1) Clone the repo
5-
git clone https://github.com/alex-petrenko/voxel-rl.git
5+
git clone https://github.com/alex-petrenko/megaverse.git
66
77
2) Init submodules
88
git submodule update --init --recursive
@@ -60,7 +60,7 @@ hash -r
6060
sudo snap install cmake --classic
6161
6262
9) Build the repo
63-
cd voxel-rl
63+
cd megaverse
6464
mkdir build
6565
cd build
6666
@@ -78,9 +78,9 @@ make -j10
7878
7979
10) Run benchmark
8080
cd Release/bin
81-
./voxel_env_app
81+
./megaverse_test_app
8282
83-
(see global boolean flags in voxel_env_app.cpp, they control the scenario and rendering settings
83+
(see global boolean flags in megaverse_test_app.cpp, they control the scenario and rendering settings
8484
TODO: make configurable)
8585
8686
11) Run viewer
@@ -103,7 +103,7 @@ pip install -e .
103103
13) Run tests
104104
python -m unittest
105105
106-
14) You are ready to use the VoxelWorld Python API!
106+
14) You are ready to use the Megaverse Python API!
107107
108108
```
109109

@@ -117,7 +117,7 @@ python -m megaverse_rl.train --train_for_seconds=360000000 --train_for_env_steps
117117
118118
Example runner script:
119119
120-
python -m sample_factory.runner.run --run=megaverse_rl.runs.voxel_single_agent --runner=processes --max_parallel=8 --pause_between=10 --experiments_per_gpu=2 --num_gpus=4
120+
python -m sample_factory.runner.run --run=megaverse_rl.runs.megaverse_single_agent --runner=processes --max_parallel=8 --pause_between=10 --experiments_per_gpu=2 --num_gpus=4
121121
122122
123123
```
File renamed without changes.
File renamed without changes.

voxel_env/voxel_env_gym.py renamed to megaverse/megaverse_env.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
from gym.spaces import Discrete
66

77
# noinspection PyUnresolvedReferences
8-
from voxel_env.extension.voxel_env import VoxelEnvGym, set_voxel_env_log_level
8+
from megaverse.extension.megaverse import MegaverseGym, set_megaverse_log_level
99

1010

11-
VOXELWORLD8 = [
11+
MEGAVERSE8 = [
1212
'TowerBuilding',
1313
'ObstaclesEasy',
1414
'ObstaclesHard',
@@ -26,8 +26,8 @@
2626

2727
def make_env_multitask(multitask_name, task_idx, num_envs, num_agents_per_env, num_simulation_threads, use_vulkan=False, params=None):
2828
assert 'multitask' in multitask_name
29-
if multitask_name.endswith('voxelworld8'):
30-
tasks = VOXELWORLD8
29+
if multitask_name.endswith('megaverse8'):
30+
tasks = MEGAVERSE8
3131
elif multitask_name.endswith('obstacles'):
3232
tasks = OBSTACLES_MULTITASK
3333
else:
@@ -36,17 +36,17 @@ def make_env_multitask(multitask_name, task_idx, num_envs, num_agents_per_env, n
3636
scenario_idx = task_idx % len(tasks)
3737
scenario = tasks[scenario_idx]
3838
print('Multi-task, scenario', scenario_idx, scenario)
39-
return VoxelEnv(scenario, num_envs, num_agents_per_env, num_simulation_threads, use_vulkan, params)
39+
return MegaverseEnv(scenario, num_envs, num_agents_per_env, num_simulation_threads, use_vulkan, params)
4040

4141

42-
class VoxelEnv(gym.Env):
42+
class MegaverseEnv(gym.Env):
4343
def __init__(self, scenario_name, num_envs, num_agents_per_env, num_simulation_threads, use_vulkan=False, params=None):
4444
scenario_name = scenario_name.casefold()
4545
self.scenario_name = scenario_name
4646

4747
self.is_multiagent = True
4848

49-
set_voxel_env_log_level(2)
49+
set_megaverse_log_level(2)
5050

5151
self.img_w = 128
5252
self.img_h = 72
@@ -69,7 +69,7 @@ def __init__(self, scenario_name, num_envs, num_agents_per_env, num_simulation_t
6969

7070
# float_params['episodeLengthSec'] = 1.0
7171

72-
self.env = VoxelEnvGym(
72+
self.env = MegaverseGym(
7373
self.scenario_name,
7474
self.img_w, self.img_h, num_envs, num_agents_per_env, num_simulation_threads, use_vulkan, float_params,
7575
)
File renamed without changes.

voxel_env/tests/test_env.py renamed to megaverse/tests/test_env.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from unittest import TestCase
88

9-
from voxel_env.voxel_env_gym import VoxelEnv, make_env_multitask
9+
from megaverse.megaverse_env import MegaverseEnv, make_env_multitask
1010

1111

1212
def sample_actions(e):
@@ -15,7 +15,7 @@ def sample_actions(e):
1515

1616
def make_test_env(num_envs, num_agents_per_env, num_simulation_threads, use_vulkan=False, params=None):
1717
"""Making env with a default scenario name."""
18-
return VoxelEnv('ObstaclesEasy', num_envs, num_agents_per_env, num_simulation_threads, use_vulkan, params)
18+
return MegaverseEnv('ObstaclesEasy', num_envs, num_agents_per_env, num_simulation_threads, use_vulkan, params)
1919

2020

2121
class TestEnv(TestCase):
@@ -121,7 +121,7 @@ def test_performance(self):
121121
# print(fps1, fps2, fps4)
122122

123123
def test_reward_shaping(self):
124-
e = VoxelEnv('TowerBuilding', num_envs=3, num_agents_per_env=2, num_simulation_threads=2, use_vulkan=True)
124+
e = MegaverseEnv('TowerBuilding', num_envs=3, num_agents_per_env=2, num_simulation_threads=2, use_vulkan=True)
125125
default_reward_shaping = e.get_default_reward_shaping()
126126
self.assertEqual(default_reward_shaping, e.get_current_reward_shaping(0))
127127
self.assertEqual(default_reward_shaping, e.get_current_reward_shaping(1))
@@ -147,12 +147,12 @@ def mem_usage_kb():
147147

148148
# params = {'episodeLengthSec': 0.1}
149149
params = {}
150-
e = VoxelEnv('Rearrange', num_envs=32, num_agents_per_env=1, num_simulation_threads=1, use_vulkan=True, params=params)
150+
e = MegaverseEnv('Rearrange', num_envs=32, num_agents_per_env=1, num_simulation_threads=1, use_vulkan=True, params=params)
151151
e.reset()
152152

153153
orig_mem_usage = mem_usage_kb()
154154

155-
for i in range(10000):
155+
for i in range(1000):
156156
print('Mem difference: ', mem_usage_kb() - orig_mem_usage, 'kb')
157157
e.step(sample_actions(e))
158158

@@ -165,7 +165,7 @@ def test_multitask(self):
165165
num_processes = 2
166166

167167
def run_single_task(i):
168-
e = make_env_multitask('voxelworld8', i, 1, 1, 1, use_vulkan=True, params={})
168+
e = make_env_multitask('megaverse8', i, 1, 1, 1, use_vulkan=True, params={})
169169
e.reset()
170170
e.render() # TODO: if this call is omitted we have rendering bugs. Fixme!
171171

@@ -186,11 +186,11 @@ def run_single_task(i):
186186

187187
def test_viewer(self):
188188
params = {'episodeLengthSec': 1.0}
189-
e1 = VoxelEnv('ObstaclesHard', 2, 2, 2, True, params)
189+
e1 = MegaverseEnv('ObstaclesHard', 2, 2, 2, True, params)
190190
e1.reset()
191191
e1.render()
192192

193-
for i in range(10000):
193+
for i in range(500):
194194
e1.step(sample_actions(e1))
195195
e1.render()
196196
time.sleep(0.01)

megaverse_rl/enjoy.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from sample_factory.algorithms.appo.enjoy_appo import enjoy
44
from sample_factory.algorithms.utils.arguments import parse_args
55

6-
from megaverse_rl.voxel_env_utils import register_env
6+
from megaverse_rl.megaverse_utils import register_env
77

88

99
def main():

megaverse_rl/voxel_env_utils.py renamed to megaverse_rl/megaverse_utils.py

+26-26
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import gym
22
from sample_factory.envs.env_registry import global_env_registry
33

4-
from voxel_env.voxel_env_gym import VoxelEnv, make_env_multitask
4+
from megaverse.megaverse_env import MegaverseEnv, make_env_multitask
55

66
from sample_factory.envs.env_utils import RewardShapingInterface, TrainingInfoInterface
77
from sample_factory.utils.utils import str2bool, log
@@ -64,8 +64,8 @@ def step(self, action):
6464
return obs, rewards, dones, infos
6565

6666

67-
def make_voxel_env(env_name, cfg=None, env_config=None, **kwargs):
68-
scenario_name = env_name.split('voxel_env_')[-1].casefold()
67+
def make_megaverse(env_name, cfg=None, env_config=None, **kwargs):
68+
scenario_name = env_name.split('megaverse_')[-1].casefold()
6969
log.debug('Using scenario %s', scenario_name)
7070

7171
if 'multitask' in scenario_name:
@@ -78,26 +78,26 @@ def make_voxel_env(env_name, cfg=None, env_config=None, **kwargs):
7878
env = make_env_multitask(
7979
scenario_name,
8080
task_idx,
81-
num_envs=cfg.voxel_num_envs_per_instance,
82-
num_agents_per_env=cfg.voxel_num_agents_per_env,
83-
num_simulation_threads=cfg.voxel_num_simulation_threads,
84-
use_vulkan=cfg.voxel_use_vulkan,
81+
num_envs=cfg.megaverse_num_envs_per_instance,
82+
num_agents_per_env=cfg.megaverse_num_agents_per_env,
83+
num_simulation_threads=cfg.megaverse_num_simulation_threads,
84+
use_vulkan=cfg.megaverse_use_vulkan,
8585
)
8686
else:
87-
env = VoxelEnv(
87+
env = MegaverseEnv(
8888
scenario_name=scenario_name,
89-
num_envs=cfg.voxel_num_envs_per_instance,
90-
num_agents_per_env=cfg.voxel_num_agents_per_env,
91-
num_simulation_threads=cfg.voxel_num_simulation_threads,
92-
use_vulkan=cfg.voxel_use_vulkan,
89+
num_envs=cfg.megaverse_num_envs_per_instance,
90+
num_agents_per_env=cfg.megaverse_num_agents_per_env,
91+
num_simulation_threads=cfg.megaverse_num_simulation_threads,
92+
use_vulkan=cfg.megaverse_use_vulkan,
9393
)
9494

95-
env = Wrapper(env, cfg.voxel_increase_team_spirit, cfg.voxel_max_team_spirit_steps)
95+
env = Wrapper(env, cfg.megaverse_increase_team_spirit, cfg.megaverse_max_team_spirit_steps)
9696
return env
9797

9898

99-
def voxel_env_override_defaults(env, parser):
100-
"""RL params specific to VoxelEnv envs."""
99+
def megaverse_override_defaults(env, parser):
100+
"""RL params specific to Megaverse envs."""
101101
parser.set_defaults(
102102
encoder_type='conv',
103103
encoder_subtype='convnet_simple',
@@ -110,22 +110,22 @@ def voxel_env_override_defaults(env, parser):
110110
)
111111

112112

113-
def add_voxel_env_args(env, parser):
113+
def add_megaverse_args(env, parser):
114114
p = parser
115-
p.add_argument('--voxel_num_envs_per_instance', default=1, type=int, help='Num simulated envs per instance of VoxelEnv')
116-
p.add_argument('--voxel_num_agents_per_env', default=4, type=int, help='Number of agents in a single env withing a VoxelEnv instance. Total number of agents in one VoxelEnv = num_envs_per_instance * num_agents_per_env')
117-
p.add_argument('--voxel_num_simulation_threads', default=1, type=int, help='Number of CPU threads to use per instance of VoxelEnv')
118-
p.add_argument('--voxel_use_vulkan', default=True, type=str2bool, help='Whether to use Vulkan renderer')
115+
p.add_argument('--megaverse_num_envs_per_instance', default=1, type=int, help='Num simulated envs per instance of Megaverse')
116+
p.add_argument('--megaverse_num_agents_per_env', default=4, type=int, help='Number of agents in a single env withing a Megaverse instance. Total number of agents in one Megaverse = num_envs_per_instance * num_agents_per_env')
117+
p.add_argument('--megaverse_num_simulation_threads', default=1, type=int, help='Number of CPU threads to use per instance of Megaverse')
118+
p.add_argument('--megaverse_use_vulkan', default=True, type=str2bool, help='Whether to use Vulkan renderer')
119119

120120
# Team Spirit options
121-
p.add_argument('--voxel_increase_team_spirit', default=False, type=str2bool, help='Increase team spirit from 0 to 1 over max_team_spirit_steps during training. At 1, the reward will be completely selfless.')
122-
p.add_argument('--voxel_max_team_spirit_steps', default=1e9, type=float, help='Number of training steps when team spirit will hit 1.')
121+
p.add_argument('--megaverse_increase_team_spirit', default=False, type=str2bool, help='Increase team spirit from 0 to 1 over max_team_spirit_steps during training. At 1, the reward will be completely selfless.')
122+
p.add_argument('--megaverse_max_team_spirit_steps', default=1e9, type=float, help='Number of training steps when team spirit will hit 1.')
123123

124124

125125
def register_env():
126126
global_env_registry().register_env(
127-
env_name_prefix='voxel_env_',
128-
make_env_func=make_voxel_env,
129-
add_extra_params_func=add_voxel_env_args,
130-
override_default_params_func=voxel_env_override_defaults,
127+
env_name_prefix='megaverse_',
128+
make_env_func=make_megaverse,
129+
add_extra_params_func=add_megaverse_args,
130+
override_default_params_func=megaverse_override_defaults,
131131
)
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,27 @@
11
from sample_factory.runner.run_description import Experiment, ParamGrid
22

33
_params = ParamGrid([
4-
('env', ['voxel_env_TowerBuilding', 'voxel_env_ObstaclesEasy', 'voxel_env_ObstaclesHard', 'voxel_env_Collect', 'voxel_env_Sokoban', 'voxel_env_HexMemory', 'voxel_env_HexExplore', 'voxel_env_Rearrange']),
4+
('env', ['megaverse_TowerBuilding', 'megaverse_ObstaclesEasy', 'megaverse_ObstaclesHard', 'megaverse_Collect', 'megaverse_Sokoban', 'megaverse_HexMemory', 'megaverse_HexExplore', 'megaverse_Rearrange']),
55
('use_cpc', ['True']),
66
('seed', [11111, 22222, 33333]),
77
])
88

9-
_cli = 'python -m megaverse_rl.train --train_for_seconds=360000000 --train_for_env_steps=2000000000 --algo=APPO --gamma=0.997 --use_rnn=True --rnn_num_layers=2 --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --voxel_num_simulation_threads=1 --voxel_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=1 --reward_clip=30'
9+
_cli = 'python -m megaverse_rl.train --train_for_seconds=360000000 --train_for_env_steps=2000000000 --algo=APPO --gamma=0.997 --use_rnn=True --rnn_num_layers=2 --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --megaverse_num_simulation_threads=1 --megaverse_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=1 --reward_clip=30'
1010

1111
EXPERIMENT_1AGENT = Experiment(
12-
'voxel_env_1ag',
13-
_cli + ' --voxel_num_envs_per_instance=36 --voxel_num_agents_per_env=1',
12+
'megaverse_1ag',
13+
_cli + ' --megaverse_num_envs_per_instance=36 --megaverse_num_agents_per_env=1',
1414
_params.generate_params(randomize=False),
1515
)
1616

1717
EXPERIMENT_2AGENTS = Experiment(
18-
'voxel_env_2ag',
19-
_cli + ' --voxel_num_envs_per_instance=18 --voxel_num_agents_per_env=2',
18+
'megaverse_2ag',
19+
_cli + ' --megaverse_num_envs_per_instance=18 --megaverse_num_agents_per_env=2',
2020
_params.generate_params(randomize=False),
2121
)
2222

2323
EXPERIMENT_4AGENTS = Experiment(
24-
'voxel_env_4ag',
25-
_cli + ' --voxel_num_envs_per_instance=9 --voxel_num_agents_per_env=4',
24+
'megaverse_4ag',
25+
_cli + ' --megaverse_num_envs_per_instance=9 --megaverse_num_agents_per_env=4',
2626
_params.generate_params(randomize=False),
2727
)

megaverse_rl/runs/voxel_envs.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid
22

33
_params = ParamGrid([
4-
('env', ['voxel_env_rearrange', 'voxel_env_collect', 'voxel_env_obstaclesEasy', 'voxel_env_hexMemory']),
5-
('voxel_num_simulation_threads', [1]),
4+
('env', ['megaverse_rearrange', 'megaverse_collect', 'megaverse_obstaclesEasy', 'megaverse_hexMemory']),
5+
('megaverse_num_simulation_threads', [1]),
66
('rnn_num_layers', [2]),
77
])
88

99
_experiment = Experiment(
10-
'voxel_env_pbt',
11-
'python -m megaverse_rl.train --train_for_seconds=360000000 --algo=APPO --gamma=0.997 --use_rnn=True --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --voxel_num_envs_per_instance=36 --voxel_num_agents_per_env=1 --voxel_num_simulation_threads=1 --voxel_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=4',
10+
'megaverse_pbt',
11+
'python -m megaverse_rl.train --train_for_seconds=360000000 --algo=APPO --gamma=0.997 --use_rnn=True --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --megaverse_num_envs_per_instance=36 --megaverse_num_agents_per_env=1 --megaverse_num_simulation_threads=1 --megaverse_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=4',
1212
_params.generate_params(randomize=False),
1313
)
1414

15-
RUN_DESCRIPTION = RunDescription('voxel_env_v114_env_v52', experiments=[_experiment])
15+
RUN_DESCRIPTION = RunDescription('megaverse_v114_env_v52', experiments=[_experiment])
+3-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from sample_factory.runner.run_description import RunDescription
2-
from sample_factory.runner.runs.voxel_base_experiments import EXPERIMENT_2AGENTS, EXPERIMENT_4AGENTS
32

4-
RUN_DESCRIPTION = RunDescription('voxel_env_v115_multi_agent_v55', experiments=[EXPERIMENT_2AGENTS, EXPERIMENT_4AGENTS])
3+
from megaverse_rl.runs.megaverse_base_experiments import EXPERIMENT_4AGENTS, EXPERIMENT_2AGENTS
4+
5+
RUN_DESCRIPTION = RunDescription('megaverse_v115_multi_agent_v55', experiments=[EXPERIMENT_2AGENTS, EXPERIMENT_4AGENTS])

megaverse_rl/runs/voxel_multitask.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid
22

33
_params = ParamGrid([
4-
('env', ['voxel_env_multitask_voxelworld8']),
4+
('env', ['megaverse_multitask_megaverse8']),
55
('use_cpc', ['True']),
66
('seed', [11111, 22222, 33333, 44444, 55555]),
77
])
88

9-
_cli = 'python -m megaverse_rl.train --train_for_seconds=360000000 --train_for_env_steps=2000000000 --algo=APPO --gamma=0.997 --use_rnn=True --rnn_num_layers=2 --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --voxel_num_simulation_threads=1 --voxel_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=1 --reward_clip=30 --pbt_mix_policies_in_one_env=False'
9+
_cli = 'python -m megaverse_rl.train --train_for_seconds=360000000 --train_for_env_steps=2000000000 --algo=APPO --gamma=0.997 --use_rnn=True --rnn_num_layers=2 --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --megaverse_num_simulation_threads=1 --megaverse_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=1 --reward_clip=30 --pbt_mix_policies_in_one_env=False'
1010

1111
EXPERIMENT_1AGENT = Experiment(
12-
'voxel_env_multitask_obs',
13-
_cli + ' --voxel_num_envs_per_instance=36 --voxel_num_agents_per_env=1',
12+
'megaverse_multitask_obs',
13+
_cli + ' --megaverse_num_envs_per_instance=36 --megaverse_num_agents_per_env=1',
1414
_params.generate_params(randomize=False),
1515
)
1616

17-
RUN_DESCRIPTION = RunDescription('voxel_env_v115_multitask8_v55', experiments=[EXPERIMENT_1AGENT])
17+
RUN_DESCRIPTION = RunDescription('megaverse_v115_multitask8_v55', experiments=[EXPERIMENT_1AGENT])

0 commit comments

Comments
 (0)