Skip to content

Commit

Permalink
Renamed VoxelEnv to Megaverse
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-petrenko committed Jun 10, 2021
1 parent eaf9e97 commit 5084ea2
Show file tree
Hide file tree
Showing 109 changed files with 274 additions and 283 deletions.
1 change: 0 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
PYTHON ?= python

export REPO=voxel-rl
export BASE_TAG=$(shell ${PYTHON} -c 'import hashlib; sha = hashlib.sha1((open("docker/Dockerfile.base").read() + open("requirements/requirements.txt").read()).encode()); print(sha.hexdigest())')
BRANCH = $(shell git rev-parse --abbrev-ref HEAD)
VERSION = $(shell git rev-parse --short HEAD)
Expand Down
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# voxel-rl
# Megaverse

```
1) Clone the repo
git clone https://github.com/alex-petrenko/voxel-rl.git
git clone https://github.com/alex-petrenko/megaverse.git
2) Init submodules
git submodule update --init --recursive
Expand Down Expand Up @@ -60,7 +60,7 @@ hash -r
sudo snap install cmake --classic
9) Build the repo
cd voxel-rl
cd megaverse
mkdir build
cd build
Expand All @@ -78,9 +78,9 @@ make -j10
10) Run benchmark
cd Release/bin
./voxel_env_app
./megaverse_test_app
(see global boolean flags in voxel_env_app.cpp, they control the scenario and rendering settings
(see global boolean flags in megaverse_test_app.cpp, they control the scenario and rendering settings
TODO: make configurable)
11) Run viewer
Expand All @@ -103,7 +103,7 @@ pip install -e .
13) Run tests
python -m unittest
14) You are ready to use the VoxelWorld Python API!
14) You are ready to use the Megaverse Python API!
```

Expand All @@ -117,7 +117,7 @@ python -m megaverse_rl.train --train_for_seconds=360000000 --train_for_env_steps
Example runner script:
python -m sample_factory.runner.run --run=megaverse_rl.runs.voxel_single_agent --runner=processes --max_parallel=8 --pause_between=10 --experiments_per_gpu=2 --num_gpus=4
python -m sample_factory.runner.run --run=megaverse_rl.runs.megaverse_single_agent --runner=processes --max_parallel=8 --pause_between=10 --experiments_per_gpu=2 --num_gpus=4
```
Expand Down
File renamed without changes.
File renamed without changes.
16 changes: 8 additions & 8 deletions voxel_env/voxel_env_gym.py → megaverse/megaverse_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
from gym.spaces import Discrete

# noinspection PyUnresolvedReferences
from voxel_env.extension.voxel_env import VoxelEnvGym, set_voxel_env_log_level
from megaverse.extension.megaverse import MegaverseGym, set_megaverse_log_level


VOXELWORLD8 = [
MEGAVERSE8 = [
'TowerBuilding',
'ObstaclesEasy',
'ObstaclesHard',
Expand All @@ -26,8 +26,8 @@

def make_env_multitask(multitask_name, task_idx, num_envs, num_agents_per_env, num_simulation_threads, use_vulkan=False, params=None):
assert 'multitask' in multitask_name
if multitask_name.endswith('voxelworld8'):
tasks = VOXELWORLD8
if multitask_name.endswith('megaverse8'):
tasks = MEGAVERSE8
elif multitask_name.endswith('obstacles'):
tasks = OBSTACLES_MULTITASK
else:
Expand All @@ -36,17 +36,17 @@ def make_env_multitask(multitask_name, task_idx, num_envs, num_agents_per_env, n
scenario_idx = task_idx % len(tasks)
scenario = tasks[scenario_idx]
print('Multi-task, scenario', scenario_idx, scenario)
return VoxelEnv(scenario, num_envs, num_agents_per_env, num_simulation_threads, use_vulkan, params)
return MegaverseEnv(scenario, num_envs, num_agents_per_env, num_simulation_threads, use_vulkan, params)


class VoxelEnv(gym.Env):
class MegaverseEnv(gym.Env):
def __init__(self, scenario_name, num_envs, num_agents_per_env, num_simulation_threads, use_vulkan=False, params=None):
scenario_name = scenario_name.casefold()
self.scenario_name = scenario_name

self.is_multiagent = True

set_voxel_env_log_level(2)
set_megaverse_log_level(2)

self.img_w = 128
self.img_h = 72
Expand All @@ -69,7 +69,7 @@ def __init__(self, scenario_name, num_envs, num_agents_per_env, num_simulation_t

# float_params['episodeLengthSec'] = 1.0

self.env = VoxelEnvGym(
self.env = MegaverseGym(
self.scenario_name,
self.img_w, self.img_h, num_envs, num_agents_per_env, num_simulation_threads, use_vulkan, float_params,
)
Expand Down
File renamed without changes.
16 changes: 8 additions & 8 deletions voxel_env/tests/test_env.py → megaverse/tests/test_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from unittest import TestCase

from voxel_env.voxel_env_gym import VoxelEnv, make_env_multitask
from megaverse.megaverse_env import MegaverseEnv, make_env_multitask


def sample_actions(e):
Expand All @@ -15,7 +15,7 @@ def sample_actions(e):

def make_test_env(num_envs, num_agents_per_env, num_simulation_threads, use_vulkan=False, params=None):
"""Making env with a default scenario name."""
return VoxelEnv('ObstaclesEasy', num_envs, num_agents_per_env, num_simulation_threads, use_vulkan, params)
return MegaverseEnv('ObstaclesEasy', num_envs, num_agents_per_env, num_simulation_threads, use_vulkan, params)


class TestEnv(TestCase):
Expand Down Expand Up @@ -121,7 +121,7 @@ def test_performance(self):
# print(fps1, fps2, fps4)

def test_reward_shaping(self):
e = VoxelEnv('TowerBuilding', num_envs=3, num_agents_per_env=2, num_simulation_threads=2, use_vulkan=True)
e = MegaverseEnv('TowerBuilding', num_envs=3, num_agents_per_env=2, num_simulation_threads=2, use_vulkan=True)
default_reward_shaping = e.get_default_reward_shaping()
self.assertEqual(default_reward_shaping, e.get_current_reward_shaping(0))
self.assertEqual(default_reward_shaping, e.get_current_reward_shaping(1))
Expand All @@ -147,12 +147,12 @@ def mem_usage_kb():

# params = {'episodeLengthSec': 0.1}
params = {}
e = VoxelEnv('Rearrange', num_envs=32, num_agents_per_env=1, num_simulation_threads=1, use_vulkan=True, params=params)
e = MegaverseEnv('Rearrange', num_envs=32, num_agents_per_env=1, num_simulation_threads=1, use_vulkan=True, params=params)
e.reset()

orig_mem_usage = mem_usage_kb()

for i in range(10000):
for i in range(1000):
print('Mem difference: ', mem_usage_kb() - orig_mem_usage, 'kb')
e.step(sample_actions(e))

Expand All @@ -165,7 +165,7 @@ def test_multitask(self):
num_processes = 2

def run_single_task(i):
e = make_env_multitask('voxelworld8', i, 1, 1, 1, use_vulkan=True, params={})
e = make_env_multitask('megaverse8', i, 1, 1, 1, use_vulkan=True, params={})
e.reset()
e.render() # TODO: if this call is omitted we have rendering bugs. Fixme!

Expand All @@ -186,11 +186,11 @@ def run_single_task(i):

def test_viewer(self):
params = {'episodeLengthSec': 1.0}
e1 = VoxelEnv('ObstaclesHard', 2, 2, 2, True, params)
e1 = MegaverseEnv('ObstaclesHard', 2, 2, 2, True, params)
e1.reset()
e1.render()

for i in range(10000):
for i in range(500):
e1.step(sample_actions(e1))
e1.render()
time.sleep(0.01)
Expand Down
2 changes: 1 addition & 1 deletion megaverse_rl/enjoy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from sample_factory.algorithms.appo.enjoy_appo import enjoy
from sample_factory.algorithms.utils.arguments import parse_args

from megaverse_rl.voxel_env_utils import register_env
from megaverse_rl.megaverse_utils import register_env


def main():
Expand Down
52 changes: 26 additions & 26 deletions megaverse_rl/voxel_env_utils.py → megaverse_rl/megaverse_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import gym
from sample_factory.envs.env_registry import global_env_registry

from voxel_env.voxel_env_gym import VoxelEnv, make_env_multitask
from megaverse.megaverse_env import MegaverseEnv, make_env_multitask

from sample_factory.envs.env_utils import RewardShapingInterface, TrainingInfoInterface
from sample_factory.utils.utils import str2bool, log
Expand Down Expand Up @@ -64,8 +64,8 @@ def step(self, action):
return obs, rewards, dones, infos


def make_voxel_env(env_name, cfg=None, env_config=None, **kwargs):
scenario_name = env_name.split('voxel_env_')[-1].casefold()
def make_megaverse(env_name, cfg=None, env_config=None, **kwargs):
scenario_name = env_name.split('megaverse_')[-1].casefold()
log.debug('Using scenario %s', scenario_name)

if 'multitask' in scenario_name:
Expand All @@ -78,26 +78,26 @@ def make_voxel_env(env_name, cfg=None, env_config=None, **kwargs):
env = make_env_multitask(
scenario_name,
task_idx,
num_envs=cfg.voxel_num_envs_per_instance,
num_agents_per_env=cfg.voxel_num_agents_per_env,
num_simulation_threads=cfg.voxel_num_simulation_threads,
use_vulkan=cfg.voxel_use_vulkan,
num_envs=cfg.megaverse_num_envs_per_instance,
num_agents_per_env=cfg.megaverse_num_agents_per_env,
num_simulation_threads=cfg.megaverse_num_simulation_threads,
use_vulkan=cfg.megaverse_use_vulkan,
)
else:
env = VoxelEnv(
env = MegaverseEnv(
scenario_name=scenario_name,
num_envs=cfg.voxel_num_envs_per_instance,
num_agents_per_env=cfg.voxel_num_agents_per_env,
num_simulation_threads=cfg.voxel_num_simulation_threads,
use_vulkan=cfg.voxel_use_vulkan,
num_envs=cfg.megaverse_num_envs_per_instance,
num_agents_per_env=cfg.megaverse_num_agents_per_env,
num_simulation_threads=cfg.megaverse_num_simulation_threads,
use_vulkan=cfg.megaverse_use_vulkan,
)

env = Wrapper(env, cfg.voxel_increase_team_spirit, cfg.voxel_max_team_spirit_steps)
env = Wrapper(env, cfg.megaverse_increase_team_spirit, cfg.megaverse_max_team_spirit_steps)
return env


def voxel_env_override_defaults(env, parser):
"""RL params specific to VoxelEnv envs."""
def megaverse_override_defaults(env, parser):
"""RL params specific to Megaverse envs."""
parser.set_defaults(
encoder_type='conv',
encoder_subtype='convnet_simple',
Expand All @@ -110,22 +110,22 @@ def voxel_env_override_defaults(env, parser):
)


def add_voxel_env_args(env, parser):
def add_megaverse_args(env, parser):
p = parser
p.add_argument('--voxel_num_envs_per_instance', default=1, type=int, help='Num simulated envs per instance of VoxelEnv')
p.add_argument('--voxel_num_agents_per_env', default=4, type=int, help='Number of agents in a single env withing a VoxelEnv instance. Total number of agents in one VoxelEnv = num_envs_per_instance * num_agents_per_env')
p.add_argument('--voxel_num_simulation_threads', default=1, type=int, help='Number of CPU threads to use per instance of VoxelEnv')
p.add_argument('--voxel_use_vulkan', default=True, type=str2bool, help='Whether to use Vulkan renderer')
p.add_argument('--megaverse_num_envs_per_instance', default=1, type=int, help='Num simulated envs per instance of Megaverse')
p.add_argument('--megaverse_num_agents_per_env', default=4, type=int, help='Number of agents in a single env withing a Megaverse instance. Total number of agents in one Megaverse = num_envs_per_instance * num_agents_per_env')
p.add_argument('--megaverse_num_simulation_threads', default=1, type=int, help='Number of CPU threads to use per instance of Megaverse')
p.add_argument('--megaverse_use_vulkan', default=True, type=str2bool, help='Whether to use Vulkan renderer')

# Team Spirit options
p.add_argument('--voxel_increase_team_spirit', default=False, type=str2bool, help='Increase team spirit from 0 to 1 over max_team_spirit_steps during training. At 1, the reward will be completely selfless.')
p.add_argument('--voxel_max_team_spirit_steps', default=1e9, type=float, help='Number of training steps when team spirit will hit 1.')
p.add_argument('--megaverse_increase_team_spirit', default=False, type=str2bool, help='Increase team spirit from 0 to 1 over max_team_spirit_steps during training. At 1, the reward will be completely selfless.')
p.add_argument('--megaverse_max_team_spirit_steps', default=1e9, type=float, help='Number of training steps when team spirit will hit 1.')


def register_env():
global_env_registry().register_env(
env_name_prefix='voxel_env_',
make_env_func=make_voxel_env,
add_extra_params_func=add_voxel_env_args,
override_default_params_func=voxel_env_override_defaults,
env_name_prefix='megaverse_',
make_env_func=make_megaverse,
add_extra_params_func=add_megaverse_args,
override_default_params_func=megaverse_override_defaults,
)
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
from sample_factory.runner.run_description import Experiment, ParamGrid

_params = ParamGrid([
('env', ['voxel_env_TowerBuilding', 'voxel_env_ObstaclesEasy', 'voxel_env_ObstaclesHard', 'voxel_env_Collect', 'voxel_env_Sokoban', 'voxel_env_HexMemory', 'voxel_env_HexExplore', 'voxel_env_Rearrange']),
('env', ['megaverse_TowerBuilding', 'megaverse_ObstaclesEasy', 'megaverse_ObstaclesHard', 'megaverse_Collect', 'megaverse_Sokoban', 'megaverse_HexMemory', 'megaverse_HexExplore', 'megaverse_Rearrange']),
('use_cpc', ['True']),
('seed', [11111, 22222, 33333]),
])

_cli = 'python -m megaverse_rl.train --train_for_seconds=360000000 --train_for_env_steps=2000000000 --algo=APPO --gamma=0.997 --use_rnn=True --rnn_num_layers=2 --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --voxel_num_simulation_threads=1 --voxel_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=1 --reward_clip=30'
_cli = 'python -m megaverse_rl.train --train_for_seconds=360000000 --train_for_env_steps=2000000000 --algo=APPO --gamma=0.997 --use_rnn=True --rnn_num_layers=2 --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --megaverse_num_simulation_threads=1 --megaverse_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=1 --reward_clip=30'

EXPERIMENT_1AGENT = Experiment(
'voxel_env_1ag',
_cli + ' --voxel_num_envs_per_instance=36 --voxel_num_agents_per_env=1',
'megaverse_1ag',
_cli + ' --megaverse_num_envs_per_instance=36 --megaverse_num_agents_per_env=1',
_params.generate_params(randomize=False),
)

EXPERIMENT_2AGENTS = Experiment(
'voxel_env_2ag',
_cli + ' --voxel_num_envs_per_instance=18 --voxel_num_agents_per_env=2',
'megaverse_2ag',
_cli + ' --megaverse_num_envs_per_instance=18 --megaverse_num_agents_per_env=2',
_params.generate_params(randomize=False),
)

EXPERIMENT_4AGENTS = Experiment(
'voxel_env_4ag',
_cli + ' --voxel_num_envs_per_instance=9 --voxel_num_agents_per_env=4',
'megaverse_4ag',
_cli + ' --megaverse_num_envs_per_instance=9 --megaverse_num_agents_per_env=4',
_params.generate_params(randomize=False),
)
10 changes: 5 additions & 5 deletions megaverse_rl/runs/voxel_envs.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
('env', ['voxel_env_rearrange', 'voxel_env_collect', 'voxel_env_obstaclesEasy', 'voxel_env_hexMemory']),
('voxel_num_simulation_threads', [1]),
('env', ['megaverse_rearrange', 'megaverse_collect', 'megaverse_obstaclesEasy', 'megaverse_hexMemory']),
('megaverse_num_simulation_threads', [1]),
('rnn_num_layers', [2]),
])

_experiment = Experiment(
'voxel_env_pbt',
'python -m megaverse_rl.train --train_for_seconds=360000000 --algo=APPO --gamma=0.997 --use_rnn=True --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --voxel_num_envs_per_instance=36 --voxel_num_agents_per_env=1 --voxel_num_simulation_threads=1 --voxel_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=4',
'megaverse_pbt',
'python -m megaverse_rl.train --train_for_seconds=360000000 --algo=APPO --gamma=0.997 --use_rnn=True --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --megaverse_num_envs_per_instance=36 --megaverse_num_agents_per_env=1 --megaverse_num_simulation_threads=1 --megaverse_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=4',
_params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('voxel_env_v114_env_v52', experiments=[_experiment])
RUN_DESCRIPTION = RunDescription('megaverse_v114_env_v52', experiments=[_experiment])
5 changes: 3 additions & 2 deletions megaverse_rl/runs/voxel_multi_agent.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from sample_factory.runner.run_description import RunDescription
from sample_factory.runner.runs.voxel_base_experiments import EXPERIMENT_2AGENTS, EXPERIMENT_4AGENTS

RUN_DESCRIPTION = RunDescription('voxel_env_v115_multi_agent_v55', experiments=[EXPERIMENT_2AGENTS, EXPERIMENT_4AGENTS])
from megaverse_rl.runs.megaverse_base_experiments import EXPERIMENT_4AGENTS, EXPERIMENT_2AGENTS

RUN_DESCRIPTION = RunDescription('megaverse_v115_multi_agent_v55', experiments=[EXPERIMENT_2AGENTS, EXPERIMENT_4AGENTS])
10 changes: 5 additions & 5 deletions megaverse_rl/runs/voxel_multitask.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
('env', ['voxel_env_multitask_voxelworld8']),
('env', ['megaverse_multitask_megaverse8']),
('use_cpc', ['True']),
('seed', [11111, 22222, 33333, 44444, 55555]),
])

_cli = 'python -m megaverse_rl.train --train_for_seconds=360000000 --train_for_env_steps=2000000000 --algo=APPO --gamma=0.997 --use_rnn=True --rnn_num_layers=2 --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --voxel_num_simulation_threads=1 --voxel_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=1 --reward_clip=30 --pbt_mix_policies_in_one_env=False'
_cli = 'python -m megaverse_rl.train --train_for_seconds=360000000 --train_for_env_steps=2000000000 --algo=APPO --gamma=0.997 --use_rnn=True --rnn_num_layers=2 --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --megaverse_num_simulation_threads=1 --megaverse_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=1 --reward_clip=30 --pbt_mix_policies_in_one_env=False'

EXPERIMENT_1AGENT = Experiment(
'voxel_env_multitask_obs',
_cli + ' --voxel_num_envs_per_instance=36 --voxel_num_agents_per_env=1',
'megaverse_multitask_obs',
_cli + ' --megaverse_num_envs_per_instance=36 --megaverse_num_agents_per_env=1',
_params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('voxel_env_v115_multitask8_v55', experiments=[EXPERIMENT_1AGENT])
RUN_DESCRIPTION = RunDescription('megaverse_v115_multitask8_v55', experiments=[EXPERIMENT_1AGENT])
Loading

0 comments on commit 5084ea2

Please sign in to comment.