diff --git a/README.md b/README.md
index f766676..0eb8e19 100644
--- a/README.md
+++ b/README.md
@@ -1,23 +1,23 @@
 # rlenvs
 
-Reinforcement learning environments for Torch7, inspired by RL-Glue [[1]](#references). Supported environments:
-
-- rlenvs.Acrobot [[2]](#references)
-- rlenvs.Atari (Arcade Learning Environment)\* [[3]](#references)
-- rlenvs.Blackjack [[4]](#references)
-- rlenvs.CartPole [[5]](#references)
-- rlenvs.Catch [[6]](#references)
-- rlenvs.CliffWalking [[7]](#references)
-- rlenvs.DynaMaze [[8]](#references)
-- rlenvs.GridWorld [[9]](#references)
-- rlenvs.JacksCarRental [[7]](#references)
-- rlenvs.Minecraft (Project Malmö)\* [[10]](#references)
-- rlenvs.MountainCar [[11]](#references)
-- rlenvs.MultiArmedBandit [[12, 13]](#references)
-- rlenvs.RandomWalk [[14]](#references)
-- rlenvs.Taxi [[15]](#references)
-- rlenvs.WindyWorld [[7]](#references)
-- rlenvs.XOWorld [[16]](#references)
+Reinforcement learning environments for Torch7, inspired by [RL-Glue](http://glue.rl-community.org/wiki/Main_Page) [[1]](#references) and conforming to the [OpenAI Gym API](https://gym.openai.com/docs) [[2]](#references). Supported environments:
+
+- rlenvs.Acrobot [[3]](#references)
+- rlenvs.Atari (Arcade Learning Environment)\* [[4]](#references)
+- rlenvs.Blackjack [[5]](#references)
+- rlenvs.CartPole [[6]](#references)
+- rlenvs.Catch [[7]](#references)
+- rlenvs.CliffWalking [[8]](#references)
+- rlenvs.DynaMaze [[9]](#references)
+- rlenvs.GridWorld [[10]](#references)
+- rlenvs.JacksCarRental [[8]](#references)
+- rlenvs.Minecraft (Project Malmö)\* [[11]](#references)
+- rlenvs.MountainCar [[12]](#references)
+- rlenvs.MultiArmedBandit [[13, 14]](#references)
+- rlenvs.RandomWalk [[15]](#references)
+- rlenvs.Taxi [[16]](#references)
+- rlenvs.WindyWorld [[8]](#references)
+- rlenvs.XOWorld [[17]](#references)
 
 Run `th experiment.lua` (or `qlua experiment.lua`) to run a demo of a random agent playing Catch.
 
@@ -44,10 +44,11 @@ Requires a [supported](https://github.com/Kaixhin/Atari/blob/master/roms/README.
 luarocks install luasocket
 ```
 
-Requires [Malmö](https://github.com/Microsoft/malmo) (includes Minecraft), extracted with directory name `MalmoPlatform`. `libMalmoLua.so` should be added to `LUA_CPATH`. For example, if `MalmoPlatform` is in your home directory, add the following to the end of your `~/.bashrc`:
+Requires [Malmö](https://github.com/Microsoft/malmo) (includes Minecraft), extracted with directory name `MalmoPlatform`. `libMalmoLua.so` should be added to `LUA_CPATH`, and the level schemas should be exported to `MALMO_XSD_PATH`. For example, if `MalmoPlatform` is in `/home/username`, add the following to the end of your `~/.bashrc`:
 
 ```sh
-export LUA_CPATH=~/MalmoPlatform/Torch_Examples/libMalmoLua.so;$LUA_CPATH
+export LUA_CPATH='/home/username/MalmoPlatform/Torch_Examples/?.so;'$LUA_CPATH
+export MALMO_XSD_PATH=/home/username/MalmoPlatform
 ```
 
 The Malmö client (`launchClient.sh`) must be operating to run.
@@ -66,15 +67,21 @@ local observation = env:start()
 
 **Note that the API is under development and may be subject to change**
 
+### rlenvs.envs
+
+A table of all environments available in `rlenvs`.
+
 ### observation = env:start([opts])
 
-Starts a new episode in the environment and returns the first `observation`. May take `opts`.
+Starts a new episode in the environment and returns the first `observation`. May take `opts`.  
+Note that environments must actually implement this as `_start`.
 
 ### reward, observation, terminal, [actionTaken] = env:step(action)
 
-Performs a step in the environment using `action` (which may be a list - see below), and returns the `reward`, the `observation` of the state transitioned to, and a `terminal` flag. Optionally provides `actionTaken`, if the environment provides supervision in the form of the actual action taken by the agent in spite of the provided action.
+Performs a step in the environment using `action` (which may be a list - see below), and returns the `reward`, the `observation` of the state transitioned to, and a `terminal` flag. Optionally provides `actionTaken`, if the environment provides supervision in the form of the actual action taken by the agent in spite of the provided action.  
+Note that environments must actually implement this as `_step`.
 
-### stateSpec = env:getStateSpec()
+### stateSpace = env:getStateSpace()
 
 Returns a state specification as a list with 3 elements:
 
@@ -86,11 +93,11 @@ Returns a state specification as a list with 3 elements:
 
 If several states are returned, `stateSpec` is itself a list of state specifications. Ranges may use `nil` if unknown.
 
-### actionSpec = env:getActionSpec()
+### actionSpace = env:getActionSpace()
 
 Returns an action specification, with the same structure as used for state specifications.
 
-### minReward, maxReward = env:getRewardSpec()
+### minReward, maxReward = env:getRewardSpace()
 
 Returns the minimum and maximum rewards produced by the environment. Values may be `nil` if unknown.
 
@@ -114,6 +121,10 @@ Returns an RGB display specification, with the same structure as used for state
 
 Returns a RGB display tensor for visualising the state of the environment. Note that this may not be the same as the state provided for the agent.
 
+### env:render()
+
+Displays the environment using `image`. Requires the code to be run with `qlua` (rather than `th`) and `getDisplay` to be implemented by the environment.
+
 ## Development
 
 Environments must inherit from `Env` and therefore implement the above methods (as well as a constructor). `experiment.lua` can be easily adapted for testing different environments. New environments should be added to `rlenvs/init.lua`, `rocks/rlenvs-scm-1.rockspec`, and be listed in this readme with an appropriate reference. For an example of a more complex environment that will only be installed if its optional dependencies are satisfied, see `rlenvs/Atari.lua`.
@@ -121,18 +132,19 @@ Environments must inherit from `Env` and therefore implement the above methods (
 ## References
 
 [1] Tanner, B., & White, A. (2009). RL-Glue: Language-independent software for reinforcement-learning experiments. *The Journal of Machine Learning Research, 10*, 2133-2136.  
-[2] DeJong, G., & Spong, M. W. (1994, June). Swinging up the acrobot: An example of intelligent control. In *American Control Conference, 1994* (Vol. 2, pp. 2158-2162). IEEE.  
-[3] Bellemare, M. G., Naddaf, Y., Veness, J., & Bowling, M. (2012). The arcade learning environment. *J. Artificial Intelligence Res, 47*, 253-279.  
-[4] Pérez-Uribe, A., & Sanchez, E. (1998, May). Blackjack as a test bed for learning strategies in neural networks. In *Neural Networks Proceedings, 1998. IEEE World Congress on Computational Intelligence. The 1998 IEEE International Joint Conference on* (Vol. 3, pp. 2022-2027). IEEE.  
-[5] Barto, A. G., Sutton, R. S., & Anderson, C. W. (1983). Neuronlike adaptive elements that can solve difficult learning control problems. *Systems, Man and Cybernetics, IEEE Transactions on*, (5), 834-846.  
-[6] Mnih, V., Heess, N., & Graves, A. (2014). Recurrent models of visual attention. In *Advances in Neural Information Processing Systems* (pp. 2204-2212).  
-[7] Sutton, R. S., & Barto, A. G. (1998). *Reinforcement learning: An introduction* (Vol. 1, No. 1). Cambridge: MIT press.  
-[8] Sutton, R. S. (1990). Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In *Proceedings of the seventh international conference on machine learning* (pp. 216-224).  
-[9] Boyan, J., & Moore, A. W. (1995). Generalization in reinforcement learning: Safely approximating the value function. *Advances in neural information processing systems*, 369-376.  
-[10] Johnson, M., Hofmann, K., Hutton, T., & Bignell, D. (2016). The Malmo platform for artificial intelligence experimentation. In *International joint conference on artificial intelligence (IJCAI)*.  
-[11] Singh, S. P., & Sutton, R. S. (1996). Reinforcement learning with replacing eligibility traces. *Machine learning, 22*(1-3), 123-158.  
-[12] Robbins, H. (1985). Some aspects of the sequential design of experiments. In *Herbert Robbins Selected Papers* (pp. 169-177). Springer New York.  
-[13] Whittle, P. (1988). Restless bandits: Activity allocation in a changing world. *Journal of applied probability*, 287-298.  
-[14] Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. *Machine learning, 3*(1), 9-44.  
-[15] Dietterich, T. G. (2000). Hierarchical Reinforcement Learning with the MAXQ Value Function Decomposition. In *Journal of Artificial Intelligence Research*.  
-[16] Garnelo, M., Arulkumaran, K., & Shanahan, M. (2016). Towards Deep Symbolic Reinforcement Learning. *arXiv preprint arXiv:1609.05518*.  
+[2] Brockman, G., Cheung, V., Pettersson, L., Schneider, J., Schulman, J., Tang, J., & Zaremba, W. (2016). OpenAI Gym. *arXiv preprint arXiv:1606.01540*.  
+[3] DeJong, G., & Spong, M. W. (1994, June). Swinging up the acrobot: An example of intelligent control. In *American Control Conference, 1994* (Vol. 2, pp. 2158-2162). IEEE.  
+[4] Bellemare, M. G., Naddaf, Y., Veness, J., & Bowling, M. (2012). The arcade learning environment. *Journal of Artificial Intelligence Research, 47*, 253-279.  
+[5] Pérez-Uribe, A., & Sanchez, E. (1998, May). Blackjack as a test bed for learning strategies in neural networks. In *Neural Networks Proceedings, 1998. IEEE World Congress on Computational Intelligence. The 1998 IEEE International Joint Conference on* (Vol. 3, pp. 2022-2027). IEEE.  
+[6] Barto, A. G., Sutton, R. S., & Anderson, C. W. (1983). Neuronlike adaptive elements that can solve difficult learning control problems. *Systems, Man and Cybernetics, IEEE Transactions on*, (5), 834-846.  
+[7] Mnih, V., Heess, N., & Graves, A. (2014). Recurrent models of visual attention. In *Advances in Neural Information Processing Systems* (pp. 2204-2212).  
+[8] Sutton, R. S., & Barto, A. G. (1998). *Reinforcement learning: An introduction* (Vol. 1, No. 1). Cambridge: MIT press.  
+[9] Sutton, R. S. (1990). Integrated architectures for learning, planning, and reacting based on approximating dynamic programming. In *Proceedings of the Seventh International Conference on Machine Learning* (pp. 216-224).  
+[10] Boyan, J., & Moore, A. W. (1995). Generalization in reinforcement learning: Safely approximating the value function. *Advances in Neural Information Processing Systems*, 369-376.  
+[11] Johnson, M., Hofmann, K., Hutton, T., & Bignell, D. (2016). The Malmo platform for artificial intelligence experimentation. In *International Joint Conference on Artificial Intelligence*.  
+[12] Singh, S. P., & Sutton, R. S. (1996). Reinforcement learning with replacing eligibility traces. *Machine Learning, 22*(1-3), 123-158.  
+[13] Robbins, H. (1985). Some aspects of the sequential design of experiments. In *Herbert Robbins Selected Papers* (pp. 169-177). Springer New York.  
+[14] Whittle, P. (1988). Restless bandits: Activity allocation in a changing world. *Journal of Applied probability*, 287-298.  
+[15] Sutton, R. S. (1988). Learning to predict by the methods of temporal differences. *Machine Learning, 3*(1), 9-44.  
+[16] Dietterich, T. G. (2000). Hierarchical Reinforcement Learning with the MAXQ Value Function Decomposition. In *Journal of Artificial Intelligence Research*.  
+[17] Garnelo, M., Arulkumaran, K., & Shanahan, M. (2016). Towards Deep Symbolic Reinforcement Learning. In *Workshop on Deep Reinforcement Learning, NIPS 2016*.
diff --git a/experiment.lua b/experiment.lua
index a184662..fabd910 100644
--- a/experiment.lua
+++ b/experiment.lua
@@ -1,38 +1,31 @@
-local image = require 'image'
-local Catch = require 'rlenvs/Catch'
-
--- Detect QT for image display
-local qt = pcall(require, 'qt')
+local Catch = require 'rlenvs.Catch'
 
 -- Initialise and start environment
-local env = Catch({level = 2})
-local stateSpec = env:getStateSpec()
-local actionSpec = env:getActionSpec()
+local env = Catch({level = 2, render = true, zoom = 10})
+local actionSpace = env:getActionSpace()
 local observation = env:start()
 
-local reward, terminal
+local reward, terminal = 0, false
 local episodes, totalReward = 0, 0
-local nSteps = 1000 * (stateSpec[2][2] - 1) -- Run for 1000 episodes
+local nEpisodes = 1000
 
 -- Display
-local window = qt and image.display({image=observation, zoom=10})
+env:render()
 
-for i = 1, nSteps do
-  -- Pick random action and execute it
-  local action = torch.random(actionSpec[3][1], actionSpec[3][2])
-  reward, observation, terminal = env:step(action)
-  totalReward = totalReward + reward
+for i = 1, nEpisodes do
+  while not terminal do
+    -- Pick random action and execute it
+    local action = torch.random(0, actionSpace['n'] - 1)
+    reward, observation, terminal = env:step(action)
+    totalReward = totalReward + reward
 
-  -- Display
-  if qt then
-    image.display({image=observation, zoom=10, win=window})
+    -- Display
+    env:render()
   end
 
-  -- If game finished, start again
-  if terminal then
-    episodes = episodes + 1
-    observation = env:start()
-  end
+  episodes = episodes + 1
+  observation = env:start()
+  terminal = false
 end
 print('Episodes: ' .. episodes)
 print('Total Reward: ' .. totalReward)
diff --git a/rlenvs/Acrobot.lua b/rlenvs/Acrobot.lua
index 38af8ff..2d8d693 100644
--- a/rlenvs/Acrobot.lua
+++ b/rlenvs/Acrobot.lua
@@ -1,11 +1,14 @@
 local classic = require 'classic'
 
 local Acrobot, super = classic.class('Acrobot', Env)
+Acrobot.timeStepLimit = 500
 
 -- Constructor
 function Acrobot:_init(opts)
   opts = opts or {}
-  
+  opts.timeStepLimit = Acrobot.timeStepLimit
+  super._init(self, opts)
+
   -- Constants
   self.g = opts.g or 9.8
   self.m1 = opts.m1 or 1 -- Mass of link 1
@@ -21,27 +24,40 @@ function Acrobot:_init(opts)
 end
 
 -- 4 states returned, of type 'real', of dimensionality 1, with differing ranges
-function Acrobot:getStateSpec()
-  return {
-    {'real', 1, {-math.pi, math.pi}}, -- Joint 1 angle
-    {'real', 1, {-math.pi, math.pi}}, -- Joint 2 angle
-    {'real', 1, {-4*math.pi, 4*math.pi}}, -- Joint 1 angular velocity
-    {'real', 1, {-9*math.pi, 9*math.pi}} -- Joint 2 angular velocity
+function Acrobot:getStateSpace()
+  local state = {}
+  state['name'] = 'Box'
+  state['shape'] = {4}
+  state['low'] = {
+    -math.pi, -- Joint 1 angle
+    -math.pi, -- Joint 2 angle
+    -4 * math.pi, -- Joint 1 angular velocity
+    -9 * math.pi -- Joint 2 angular velocity
+  }
+  state['high'] = {
+    math.pi, -- Joint 1 angle
+    math.pi, -- Joint 2 angle
+    4 * math.pi, -- Joint 1 angular velocity
+    9 * math.pi -- Joint 2 angular velocity
   }
+  return state
 end
 
 -- 1 action required, of type 'int', of dimensionality 1, with second torque joint in {-1, 0, 1}
-function Acrobot:getActionSpec()
-  return {'int', 1, {-1, 1}}
+function Acrobot:getActionSpace()
+  local action = {}
+  action['name'] = 'Discrete'
+  action['n'] = 3
+  return action
 end
 
 -- Min and max reward
-function Acrobot:getRewardSpec()
+function Acrobot:getRewardSpace()
   return -1, 0
 end
 
 -- Resets the cart
-function Acrobot:start()
+function Acrobot:_start()
   -- Reset angles and velocities
   self.q1 = 0 -- Joint 1 angle
   self.q2 = 0 -- Joint 2 angle
@@ -52,20 +68,19 @@ function Acrobot:start()
 end
 
 -- Swings the pole via torque on second joint
-function Acrobot:step(action)
+function Acrobot:_step(action)
+  action = action - 1 -- rescale the action
   local reward = -1
   local terminal = false
 
   for t = 1, self.steps do
     -- Calculate motion of system
-    local d1 = self.m1*math.pow(self.lc1, 2) + self.m2*(math.pow(self.l1, 2) + math.pow(self.lc2, 2) + 2*self.l1*self.lc2*math.cos(self.q2)) + self.I1 + self.I2
-    local d2 = self.m2*(math.pow(self.lc2, 2) + self.l1*self.lc2*math.cos(self.q2)) + self.I2
-    local phi2 = self.m2*self.lc2*self.g*math.cos(self.q1 + self.q2 - math.pi/2)
-    local phi1 = -self.m2*self.l1*self.lc2*math.pow(self.q2Dot, 2)*math.sin(self.q2) - 2*self.m2*self.l1*self.lc2*self.q2Dot*self.q1Dot*math.sin(self.q2) +
-                 (self.m1*self.lc1 + self.m2*self.l1)*self.g*math.cos(self.q1 - math.pi/2) + phi2
-    local q2DotDot = (action + d2/d1*phi1 - self.m2*self.l1*self.lc2*math.pow(self.q1Dot, 2)*math.sin(self.q2) - phi2) /
-                     (self.m2*math.pow(self.lc2, 2) + self.I2 - math.pow(d2, 2)/d1)
-    local q1DotDot = -(d2/q2DotDot + phi1)/d1
+    local d1 = self.m1 * math.pow(self.lc1, 2) + self.m2 * (math.pow(self.l1, 2) + math.pow(self.lc2, 2) + 2 * self.l1 * self.lc2 * math.cos(self.q2)) + self.I1 + self.I2
+    local d2 = self.m2 * (math.pow(self.lc2, 2) + self.l1 * self.lc2 * math.cos(self.q2)) + self.I2
+    local phi2 = self.m2 * self.lc2 * self.g * math.cos(self.q1 + self.q2 - math.pi/2)
+    local phi1 = -self.m2 * self.l1 * self.lc2 * math.pow(self.q2Dot, 2) * math.sin(self.q2) - 2 * self.m2 * self.l1 * self.lc2 * self.q2Dot * self.q1Dot * math.sin(self.q2) + (self.m1 * self.lc1 + self.m2 * self.l1) * self.g * math.cos(self.q1 - math.pi / 2) + phi2
+    local q2DotDot = (action + d2 / d1 * phi1 - self.m2 * self.l1 * self.lc2 * math.pow(self.q1Dot, 2) * math.sin(self.q2) - phi2) / (self.m2 * math.pow(self.lc2, 2) + self.I2 - math.pow(d2, 2) / d1)
+    local q1DotDot = -(d2 / q2DotDot + phi1) / d1
 
     -- Update state using Euler's method
     self.q1Dot = self.q1Dot + self.tau * q1DotDot
@@ -86,13 +101,13 @@ function Acrobot:step(action)
     self.q2 = math.pi - (self.q2 % -math.pi)
   end
   -- Limit velocities
-  self.q1Dot = math.max(self.q1Dot, -4*math.pi)
-  self.q1Dot = math.min(self.q1Dot, 4*math.pi)
-  self.q2Dot = math.max(self.q2Dot, -9*math.pi)
-  self.q2Dot = math.min(self.q2Dot, 9*math.pi)
+  self.q1Dot = math.max(self.q1Dot, -4 * math.pi)
+  self.q1Dot = math.min(self.q1Dot, 4 * math.pi)
+  self.q2Dot = math.max(self.q2Dot, -9 * math.pi)
+  self.q2Dot = math.min(self.q2Dot, 9 * math.pi)
 
   -- Terminate if second joint's height is greater than height of first joint (relative to origin)
-  local h = -self.l1*math.cos(self.q1) - self.l2*math.sin(math.pi/2 - self.q1 - self.q2)
+  local h = -self.l1 * math.cos(self.q1) - self.l2 * math.sin(math.pi / 2 - self.q1 - self.q2)
   if h > self.l1 then
     reward = 0
     terminal = true
diff --git a/rlenvs/Atari.lua b/rlenvs/Atari.lua
index 83e1ea8..23cb39c 100644
--- a/rlenvs/Atari.lua
+++ b/rlenvs/Atari.lua
@@ -6,11 +6,15 @@ if not hasALEWrap then
 end
 
 local Atari, super = classic.class('Atari', Env)
+Atari.timeStepLimit = 100000
 
 -- Constructor
 function Atari:_init(opts)
   -- Create ALEWrap options from opts
   opts = opts or {}
+  opts.timeStepLimit = Atari.timeStepLimit
+  super._init(self, opts)
+
   if opts.lifeLossTerminal == nil then
     opts.lifeLossTerminal = true
   end
@@ -44,13 +48,25 @@ function Atari:_init(opts)
 end
 
 -- 1 state returned, of type 'real', of dimensionality 3 x 210 x 160, between 0 and 1
-function Atari:getStateSpec()
-  return {'real', {3, 210, 160}, {0, 1}}
+function Atari:getStateSpace()
+  local state = {}
+  state['name'] = 'Box'
+  state['shape'] = {3, 210, 160}
+  state['low'] = {
+    0
+  }
+  state['high'] = {
+    1
+  }
+  return state
 end
 
 -- 1 action required, of type 'int', of dimensionality 1, between 1 and 18 (max)
-function Atari:getActionSpec()
-  return {'int', 1, {1, #self.actions}}
+function Atari:getActionSpace()
+  local action = {}
+  action['name'] = 'Discrete'
+  action['n'] = #self.actions
+  return action
 end
 
 -- RGB screen of height 210 and width 160
@@ -59,12 +75,12 @@ function Atari:getDisplaySpec()
 end
 
 -- Min and max reward (unknown)
-function Atari:getRewardSpec()
+function Atari:getRewardSpace()
   return nil, nil
 end
 
 -- Starts a new game, possibly with a random number of no-ops
-function Atari:start()
+function Atari:_start()
   local screen, reward, terminal
   
   if self.gameEnv._random_starts > 0 then
@@ -77,7 +93,7 @@ function Atari:start()
 end
 
 -- Steps in a game
-function Atari:step(action)
+function Atari:_step(action)
   -- Map action index to action for game
   action = self.actions[action]
 
diff --git a/rlenvs/Blackjack.lua b/rlenvs/Blackjack.lua
index 7d436c4..b174455 100644
--- a/rlenvs/Blackjack.lua
+++ b/rlenvs/Blackjack.lua
@@ -7,31 +7,45 @@ local Blackjack, super = classic.class('Blackjack', Env)
 function Blackjack:_init(opts)
   opts = opts or {}
 
+  super._init(self, opts)
+
   -- Create number-only suit
   self.suit = torch.Tensor({2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 11})
 end
 
 -- 2 states returned, of type 'int', of dimensionality 1, for the player sum, dealer's showing card, and player-usable ace
-function Blackjack:getStateSpec()
-  return {
-    {'int', 1, {2, 20}},
-    {'int', 1, {1, 10}},
-    {'int', 1, {0, 1}}
+function Blackjack:getStateSpace()
+  local state = {}
+  state['name'] = 'Box'
+  state['shape'] = {3}
+  state['low'] = {
+    2,
+    1,
+    0
+  }
+  state['high'] = {
+    20,
+    10,
+    1
   }
+  return state
 end
 
 -- 1 action required, of type 'int', of dimensionality 1, either stand or hit
-function Blackjack:getActionSpec()
-  return {'int', 1, {0, 1}}
+function Blackjack:getActionSpace()
+  local action = {}
+  action['name'] = 'Discrete'
+  action['n'] = 2
+  return action
 end
 
 -- Min and max reward
-function Blackjack:getRewardSpec()
+function Blackjack:getRewardSpace()
   return -1, 1
 end
 
 -- Draw 2 cards for player and dealer
-function Blackjack:start()
+function Blackjack:_start()
   -- Shuffle deck
   self.deck = torch.cat({self.suit, self.suit, self.suit, self.suit}, 1):index(1, torch.randperm(52):long())
 
@@ -51,7 +65,7 @@ function Blackjack:start()
 end
 
 -- Player stands or hits
-function Blackjack:step(action)
+function Blackjack:_step(action)
   local reward = 0
   local terminal = false
 
diff --git a/rlenvs/CartPole.lua b/rlenvs/CartPole.lua
index 5b1e49b..4a99613 100644
--- a/rlenvs/CartPole.lua
+++ b/rlenvs/CartPole.lua
@@ -1,11 +1,14 @@
 local classic = require 'classic'
 
 local CartPole, super = classic.class('CartPole', Env)
+CartPole.timeStepLimit = 200
 
 -- Constructor
 function CartPole:_init(opts)
   opts = opts or {}
-  
+  opts.timeStepLimit = CartPole.timeStepLimit
+  super._init(self, opts)
+
   -- Constants
   self.gravity = opts.gravity or 9.8
   self.cartMass = opts.cartMass or 1.0
@@ -19,27 +22,40 @@ function CartPole:_init(opts)
 end
 
 -- 4 states returned, of type 'real', of dimensionality 1, with differing ranges
-function CartPole:getStateSpec()
-  return {
-    {'real', 1, {-2.4, 2.4}}, -- Cart position
-    {'real', 1, {nil, nil}}, -- Cart velocity
-    {'real', 1, {math.rad(-12), math.rad(12)}}, -- Pole angle
-    {'real', 1, {nil, nil}} -- Pole angular velocity
+function CartPole:getStateSpace()
+  local state = {}
+  state['name'] = 'Box'
+  state['shape'] = {4}
+  state['low'] = {
+    -2.4, -- Cart position
+    math.huge, -- Cart velocity
+    math.rad(-12), -- Pole angle
+    math.huge -- Pole angular velocity
+  }
+  state['high'] = {
+    2.4, -- Cart position
+    math.huge, -- Cart velocity
+    math.rad(12), -- Pole angle
+    math.huge -- Pole angular velocity
   }
+  return state
 end
 
 -- 1 action required, of type 'int', of dimensionality 1, between 0 and 1 (left, right)
-function CartPole:getActionSpec()
-  return {'int', 1, {0, 1}}
+function CartPole:getActionSpace()
+  local action = {}
+  action['name'] = 'Discrete'
+  action['n'] = 2
+  return action
 end
 
 -- Min and max reward
-function CartPole:getRewardSpec()
+function CartPole:getRewardSpace()
   return -1, 0
 end
 
 -- Resets the cart
-function CartPole:start()
+function CartPole:_start()
   -- Reset position, angle and velocities
   self.x = 0 -- Cart position (m)
   self.xDot = 0 -- Cart velocity
@@ -50,14 +66,14 @@ function CartPole:start()
 end
 
 -- Drives the cart
-function CartPole:step(action)
+function CartPole:_step(action)
   -- Calculate acceleration
   local force = action == 1 and self.forceMagnitude or -self.forceMagnitude
   local cosTheta = math.cos(self.theta)
   local sinTheta = math.sin(self.theta)
-  local temp = (force + 0.5*self.poleMassLength * math.pow(self.thetaDot, 2) * sinTheta) / self.totalMass
-  local thetaDotDot = (self.gravity * sinTheta - cosTheta * temp) / (0.5*self.poleLength * (4/3 - self.poleMass * math.pow(cosTheta, 2) / self.totalMass))
-  local xDotDot = temp - 0.5*self.poleMassLength * thetaDotDot * cosTheta / self.totalMass
+  local temp = (force + 0.5 * self.poleMassLength * math.pow(self.thetaDot, 2) * sinTheta) / self.totalMass
+  local thetaDotDot = (self.gravity * sinTheta - cosTheta * temp) / (0.5 * self.poleLength * (4 / 3 - self.poleMass * math.pow(cosTheta, 2) / self.totalMass))
+  local xDotDot = temp - 0.5 * self.poleMassLength * thetaDotDot * cosTheta / self.totalMass
 
   -- Update state using Euler's method
   self.x = self.x + self.tau * self.xDot
@@ -66,10 +82,10 @@ function CartPole:step(action)
   self.thetaDot = self.thetaDot + self.tau * thetaDotDot
 
   -- Check failure (if cart reaches sides of track/pole tips too much)
-  local reward = 0
+  local reward = 1
   local terminal = false
   if self.x < -2.4 or self.x > 2.4 or self.theta < math.rad(-12) or self.theta > math.rad(12) then
-    reward = -1
+    reward = 0
     terminal = true
   end
 
diff --git a/rlenvs/Catch.lua b/rlenvs/Catch.lua
index 40ba990..4fc87e0 100644
--- a/rlenvs/Catch.lua
+++ b/rlenvs/Catch.lua
@@ -1,10 +1,12 @@
 local classic = require 'classic'
+local Env = require 'rlenvs/Env'
 
 local Catch, super = classic.class('Catch', Env)
 
 -- Constructor
 function Catch:_init(opts)
   opts = opts or {}
+  super._init(self, opts)
 
   -- Difficulty level
   self.level = opts.level or 2
@@ -28,13 +30,25 @@ function Catch:_init(opts)
 end
 
 -- 1 state returned, of type 'int', of dimensionality 1 x self.size x self.size, between 0 and 1
-function Catch:getStateSpec()
-  return {'int', {1, self.size, self.size}, {0, 1}}
+function Catch:getStateSpace()
+  local state = {}
+  state['name'] = 'Box'
+  state['shape'] = {1, self.size, self.size}
+  state['low'] = {
+    0
+  }
+  state['high'] = {
+    1
+  }
+  return state
 end
 
 -- 1 action required, of type 'int', of dimensionality 1, between 0 and 2
-function Catch:getActionSpec()
-  return {'int', 1, {0, 2}}
+function Catch:getActionSpace()
+  local action = {}
+  action['name'] = 'Discrete'
+  action['n'] = 3
+  return action
 end
 
 -- RGB screen of size self.size x self.size
@@ -43,7 +57,7 @@ function Catch:getDisplaySpec()
 end
 
 -- Min and max reward
-function Catch:getRewardSpec()
+function Catch:getRewardSpace()
   return 0, 1
 end
 
@@ -64,7 +78,7 @@ function Catch:redraw()
 end
 
 -- Starts new game
-function Catch:start()
+function Catch:_start()
   -- Reset player and ball
   self.player.x = math.ceil(self.size / 2)
   self.ball.x = torch.random(self.size)
@@ -80,7 +94,7 @@ function Catch:start()
 end
 
 -- Steps in a game
-function Catch:step(action)
+function Catch:_step(action)
   -- Reward is 0 by default
   local reward = 0
 
diff --git a/rlenvs/CliffWalking.lua b/rlenvs/CliffWalking.lua
index 41d029f..4200e4f 100644
--- a/rlenvs/CliffWalking.lua
+++ b/rlenvs/CliffWalking.lua
@@ -5,35 +5,48 @@ local CliffWalking, super = classic.class('CliffWalking', Env)
 -- Constructor
 function CliffWalking:_init(opts)
   opts = opts or {}
+  super._init(self, opts)
 end
 
 -- 2 states returned, of type 'int', of dimensionality 1, where x is 1-12 and y is 1-4
-function CliffWalking:getStateSpec()
-  return {
-    {'int', 1, {1, 12}}, -- x
-    {'int', 1, {1, 4}} -- y
+function CliffWalking:getStateSpace()
+  local state = {}
+  state['name'] = 'Box'
+  state['shape'] = {2}
+  state['low'] = {
+    1, -- x
+    1 -- y
   }
+  state['high'] = {
+    12, -- x
+    4 -- y
+  }
+  return state
 end
 
 -- 1 action required, of type 'int', of dimensionality 1, between 1 and 4 (up|right|down|left)
-function CliffWalking:getActionSpec()
-  return {'int', 1, {1, 4}}
+function CliffWalking:getActionSpace()
+  local action = {}
+  action['name'] = 'Discrete'
+  action['n'] = 4
+  return action
 end
 
 -- Min and max reward
-function CliffWalking:getRewardSpec()
+function CliffWalking:getRewardSpace()
   return -100, -1
 end
 
 -- Reset position
-function CliffWalking:start()
+function CliffWalking:_start()
   self.position = {1, 1}
 
   return self.position
 end
 
 -- Move up, right, down or left
-function CliffWalking:step(action)
+function CliffWalking:_step(action)
+  action = action + 1 -- scale action
   local reward = -1
   local terminal = false
 
diff --git a/rlenvs/DynaMaze.lua b/rlenvs/DynaMaze.lua
index 42faaf0..c1b9ddb 100644
--- a/rlenvs/DynaMaze.lua
+++ b/rlenvs/DynaMaze.lua
@@ -5,6 +5,7 @@ local DynaMaze, super = classic.class('DynaMaze', Env)
 -- Constructor
 function DynaMaze:_init(opts)
   opts = opts or {}
+  super._init(self, opts)
 
   -- Set change: none|blocking|shortcut
   self.change = opts.change or 'none'
@@ -27,25 +28,36 @@ function DynaMaze:_init(opts)
 end
 
 -- 2 states returned, of type 'int', of dimensionality 1, where x is 1-9 and y is 1-6
-function DynaMaze:getStateSpec()
-  return {
-    {'int', 1, {1, 9}}, -- x
-    {'int', 1, {1, 6}} -- y
+function DynaMaze:getStateSpace()
+  local state = {}
+  state['name'] = 'Box'
+  state['shape'] = {2}
+  state['low'] = {
+    1, -- x
+    1 -- y
   }
+  state['high'] = {
+    9, -- x
+    6 -- y
+  }
+  return state
 end
 
 -- 1 action required, of type 'int', of dimensionality 1, between 1 and 4
-function DynaMaze:getActionSpec()
-  return {'int', 1, {1, 4}}
+function DynaMaze:getActionSpace()
+  local action = {}
+  action['name'] = 'Discrete'
+  action['n'] = 4
+  return action
 end
 
 -- Min and max reward
-function DynaMaze:getRewardSpec()
+function DynaMaze:getRewardSpace()
   return 0, 1
 end
 
 -- Reset position
-function DynaMaze:start()
+function DynaMaze:_start()
   if self.change == 'none' then
     self.position = {1, 4}
   else
@@ -56,7 +68,8 @@ function DynaMaze:start()
 end
 
 -- Move up, right, down or left
-function DynaMaze:step(action)
+function DynaMaze:_step(action)
+  action = action + 1 -- scale action
   local reward = 0
   local terminal = false
 
diff --git a/rlenvs/Env.lua b/rlenvs/Env.lua
index 262c5f4..824a31a 100644
--- a/rlenvs/Env.lua
+++ b/rlenvs/Env.lua
@@ -3,10 +3,58 @@ local classic = require 'classic'
 local Env = classic.class('Env')
 
 -- Denote interfaces
-Env:mustHave('start')
-Env:mustHave('step')
-Env:mustHave('getStateSpec')
-Env:mustHave('getActionSpec')
-Env:mustHave('getRewardSpec')
+Env:mustHave('_start')
+Env:mustHave('_step')
+Env:mustHave('getStateSpace')
+Env:mustHave('getActionSpace')
+Env:mustHave('getRewardSpace')
+
+function Env:_init(opts)
+  -- Set max number of steps per episode (default 1000)
+  if opts.timeStepLimit and opts.maxSteps then
+    self.maxSteps = math.min(opts.timeStepLimit, opts.maxSteps)
+  elseif opts.maxSteps then
+    self.maxSteps = opts.maxSteps
+  elseif opts.timeStepLimit then
+    self.maxSteps = opts.timeStepLimit
+  else
+    self.maxSteps = 1000
+  end
+  self.currentStep = 1
+
+  if opts.render then
+    require 'image'
+    self.qt = pcall(require, 'qt')
+    if not self.qt then
+      print('Was not able to load qt to render, are you using qlua to run the script?')
+    end
+    self.zoom = opts.zoom or 1
+  end
+end
+
+function Env:step(action)
+  local reward, state, terminal = self:_step(action)
+
+  if self.currentStep == self.maxSteps then
+    terminal = true
+    self.currentStep = 0
+  end
+  self.currentStep = self.currentStep + 1
+
+  return reward, state, terminal
+end
+
+function Env:start()
+  self.currentStep = 1
+  local state = self:_start()
+  return state
+end
+
+function Env:render()
+  if self.qt and self.getDisplay then
+    self.window = self.window == nil and image.display({image = self:getDisplay(), zoom = self.zoom}) or self.window
+    image.display({image = self:getDisplay(), zoom = self.zoom, win = self.window})
+  end
+end
 
 return Env
diff --git a/rlenvs/GridWorld.lua b/rlenvs/GridWorld.lua
index 66fba69..8bbf2b2 100644
--- a/rlenvs/GridWorld.lua
+++ b/rlenvs/GridWorld.lua
@@ -6,6 +6,7 @@ local GridWorld, super = classic.class('GridWorld', Env)
 -- Constructor
 function GridWorld:_init(opts)
   opts = opts or {}
+  super._init(self, opts)
 
   -- Cost of moving in world (discretized)
   self.world = torch.Tensor(101, 101):fill(-0.5)
@@ -13,38 +14,50 @@ function GridWorld:_init(opts)
   -- PuddleWorld
   if opts.puddles then
     -- Create 2D Gaussians to subtract from world
-    self.world[{{30, 90}, {30, 50}}]:csub(image.gaussian({width=21, height=61}))
-    self.world[{{60, 80}, {1, 50}}]:csub(image.gaussian({width=60, height=21})[{{}, {11, 60}}])
+    self.world[{{30, 90}, {30, 50}}]:csub(image.gaussian({width = 21, height = 61}))
+    self.world[{{60, 80}, {1, 50}}]:csub(image.gaussian({width = 60, height = 21})[{{}, {11, 60}}])
   end
 end
 
 -- 2 states returned, of type 'real', of dimensionality 1, from 0-1
-function GridWorld:getStateSpec()
-  return {
-    {'real', 1, {0, 1}}, -- x
-    {'real', 1, {0, 1}} -- y
+function GridWorld:getStateSpace()
+  local state = {}
+  state['name'] = 'Box'
+  state['shape'] = {2}
+  state['low'] = {
+    0, -- x
+    0 -- y
   }
+  state['high'] = {
+    1, -- x
+    1 -- y
+  }
+  return state
 end
 
 -- 1 action required, of type 'int', of dimensionality 1, between 1 and 4
-function GridWorld:getActionSpec()
-  return {'int', 1, {1, 4}}
+function GridWorld:getActionSpace()
+  local action = {}
+  action['name'] = 'Discrete'
+  action['n'] = 4
+  return action
 end
 
 -- Min and max reward
-function GridWorld:getRewardSpec()
+function GridWorld:getRewardSpace()
   return torch.min(self.world), 0
 end
 
 -- Reset position
-function GridWorld:start()
+function GridWorld:_start()
   self.position = {0.2, 0.4}
 
   return self.position
 end
 
 -- Move up, right, down or left
-function GridWorld:step(action)
+function GridWorld:_step(action)
+  action = action + 1 -- scale action
   local terminal = false
 
   -- Move
@@ -63,7 +76,7 @@ function GridWorld:step(action)
   end
 
   -- Look up cost of moving to position
-  local reward = self.world[{{self.position[1]*100+1}, {self.position[2]*100+1}}][1][1]
+  local reward = self.world[{{self.position[1] * 100 + 1}, {self.position[2] * 100 + 1}}][1][1]
 
   -- Check if reached goal
   if self.position[1] == 1 and self.position[2] == 1 then
diff --git a/rlenvs/JacksCarRental.lua b/rlenvs/JacksCarRental.lua
index fa9d3a3..a21719c 100644
--- a/rlenvs/JacksCarRental.lua
+++ b/rlenvs/JacksCarRental.lua
@@ -19,28 +19,40 @@ end
 -- Constructor
 function JacksCarRental:_init(opts)
   opts = opts or {}
+  super._init(self, opts)
 end
 
 -- 2 states returned, of type 'int', of dimensionality 1, for 0-20 cars
-function JacksCarRental:getStateSpec()
-  return {
-    {'int', 1, {0, 20}}, -- Lot 1
-    {'int', 1, {0, 20}} -- Lot 2
+function JacksCarRental:getStateSpace()
+  local state = {}
+  state['name'] = 'Box'
+  state['shape'] = {2}
+  state['low'] = {
+    0, -- Lot 1
+    0 -- Lot 2
   }
+  state['high'] = {
+    20, -- Lot 1
+    20 -- Lot 2
+  }
+  return state
 end
 
 -- 1 action required, of type 'int', of dimensionality 1, between -5 and 5 (max 5 cars can be moved overnight)
-function JacksCarRental:getActionSpec()
-  return {'int', 1, {-5, 5}} -- Negative numbers indicate transferring cars from lot 2 to lot 1
+function JacksCarRental:getActionSpace()
+  local action = {}
+  action['name'] = 'Discrete'
+  action['n'] = 10
+  return action
 end
 
 -- Min and max reward
-function JacksCarRental:getRewardSpec()
+function JacksCarRental:getRewardSpace()
   return 0, 200
 end
 
 -- Resets the cars to 10 at each lot
-function JacksCarRental:start()
+function JacksCarRental:_start()
   self.lot1 = 10
   self.lot2 = 10
 
@@ -48,18 +60,19 @@ function JacksCarRental:start()
 end
 
 -- Acts out a day and night for Jack's Car Rental
-function JacksCarRental:step(action)
+function JacksCarRental:_step(action)
+  action = action - 5 -- scale action
   local reward = 0 -- Reward in $
 
   -- Customers rent cars from lot 1 during the day
   local lot1Rentals = math.min(poisson(3), self.lot1)
   self.lot1 = self.lot1 - lot1Rentals
-  reward = reward + 10*lot1Rentals
+  reward = reward + 10 * lot1Rentals
 
   -- Customers rent cars from lot 2 during the day
   local lot2Rentals = math.min(poisson(4), self.lot2)
   self.lot2 = self.lot2 - lot2Rentals
-  reward = reward + 10*lot2Rentals
+  reward = reward + 10 * lot2Rentals
 
   -- Customers return cars to lot 1 at the end of the day
   local lot1Returns = poisson(3)
@@ -77,14 +90,14 @@ function JacksCarRental:step(action)
     -- Move cars
     self.lot1 = self.lot1 - carsMoved
     self.lot2 = self.lot2 + carsMoved
-    reward = reward - 2*carsMoved
-  elseif action < 0 then
+    reward = reward - 2 * carsMoved
+  elseif action < 0 then -- Negative numbers indicate transferring cars from lot 2 to lot 1
     carsMoved = math.min(-action, self.lot2)
     carsMoved = math.min(carsMoved, 20 - self.lot1)
     -- Move cars
     self.lot2 = self.lot2 - carsMoved
     self.lot1 = self.lot1 + carsMoved
-    reward = reward - 2*carsMoved
+    reward = reward - 2 * carsMoved
   end
 
   return reward, {self.lot1, self.lot2}, false
diff --git a/rlenvs/Minecraft.lua b/rlenvs/Minecraft.lua
index 8997462..f31d850 100644
--- a/rlenvs/Minecraft.lua
+++ b/rlenvs/Minecraft.lua
@@ -16,12 +16,14 @@ end
 
 -- Constructor
 function Minecraft:_init(opts)
+  opts = opts or {}
+  super._init(self, opts)
   -- Check libaMalmoLua is available locally
   if not hasLibMalmoLua then
-    print("Requires libMalmoLua.so in LUA_CPATH")
+    print("Requires libMalmoLua.so")
     os.exit()
   end
-  
+
   opts = opts or {}
   self.height = opts.height or 84
   self.width = opts.width or 84
@@ -33,14 +35,14 @@ function Minecraft:_init(opts)
   </About>
 
   <ServerSection>
-    <ServerInitialConditions>
-      <Time>
-        <StartTime>6000</StartTime>
-        <AllowPassageOfTime>false</AllowPassageOfTime>
-      </Time>
-      <Weather>clear</Weather>
-      <AllowSpawning>false</AllowSpawning>
-    </ServerInitialConditions>
+      <ServerInitialConditions>
+        <Time>
+          <StartTime>6000</StartTime>
+          <AllowPassageOfTime>false</AllowPassageOfTime>
+        </Time>
+        <Weather>clear</Weather>
+        <AllowSpawning>false</AllowSpawning>
+       </ServerInitialConditions>
     <ServerHandlers>
       <FlatWorldGenerator generatorString="3;7,220*1,5*3,2;3;,biome_1" />
       <ClassroomDecorator seed="__SEED__">
@@ -69,7 +71,7 @@ function Minecraft:_init(opts)
           <hintLikelihood>1</hintLikelihood>
         </specification>
       </ClassroomDecorator>
-      <ServerQuitFromTimeUp timeLimitMs="30000" description="out_of_time" />
+      <ServerQuitFromTimeUp timeLimitMs="30000" description="out_of_time"/>
       <ServerQuitWhenAnyAgentFinishes />
     </ServerHandlers>
   </ServerSection>
@@ -77,12 +79,12 @@ function Minecraft:_init(opts)
   <AgentSection mode="Survival">
     <Name>James Bond</Name>
     <AgentStart>
-      <Placement x="-203.5" y="81.0" z="217.5" />
+      <Placement x="-203.5" y="81.0" z="217.5"/>
     </AgentStart>
     <AgentHandlers>
       <VideoProducer want_depth="false">
-        <Width>160</Width>
-        <Height>160</Height>
+        <Width>320</Width>
+        <Height>240</Height>
       </VideoProducer>
       <ObservationFromFullStats />
       <ContinuousMovementCommands turnSpeedDegs="180">
@@ -90,10 +92,11 @@ function Minecraft:_init(opts)
           <command>attack</command>
         </ModifierList>
       </ContinuousMovementCommands>
-      <RewardForSendingCommand reward="0" />
-      <RewardForMissionEnd>
-        <Reward description="found_goal" reward="100" />
-        <Reward description="out_of_time" reward="-100" />
+      <MissionQuitCommands quitDescription="give_up"/>
+      <RewardForSendingCommand reward="0"/>
+      <RewardForMissionEnd rewardForDeath="-10000">
+        <Reward description="found_goal" reward="1000" />
+        <Reward description="out_of_time" reward="-1000" />
       </RewardForMissionEnd>
       <RewardForTouchingBlockType>
         <Block type="gold_ore diamond_ore redstone_ore" reward="20" />
@@ -120,16 +123,28 @@ function Minecraft:_init(opts)
 end
 
 -- 2 states returned, of type 'real', of dimensionality 1, from 0-1
-function Minecraft:getStateSpec()
-  return {'real', {3, self.height, self.width}, {0, 1}}
+function Minecraft:getStateSpace()
+  local state = {}
+  state['name'] = 'Box'
+  state['shape'] = {3, self.height, self.width}
+  state['low'] = {
+    0
+  }
+  state['high'] = {
+    1
+  }
+  return state
 end
 
-function Minecraft:getActionSpec()
-  return {'int', 1, {1, #self.actions}}
+function Minecraft:getActionSpace()
+  local action = {}
+  action['name'] = 'Discrete'
+  action['n'] = #self.actions
+  return action
 end
 
--- Min and max reward
-function Minecraft:getRewardSpec()
+-- Min and max reward (unknown)
+function Minecraft:getRewardSpace()
   return nil, nil
 end
 
@@ -157,8 +172,16 @@ function Minecraft:getRewards(world_rewards)
   return proc_rewards
 end
 
--- Reset position
-function Minecraft:start()
+-- Start new mission
+function Minecraft:_start()
+  local world_state = self.agent_host:getWorldState()
+
+  -- check if a previous mission is still running before starting a new one
+  if world_state.is_mission_running then
+	  self.agent_host:sendCommand("quit")
+	  sleep(0.5)
+  end
+
   local mission = MissionSpec(self.mission_xml, true)
   local mission_record = MissionRecordSpec()
 
@@ -173,9 +196,6 @@ function Minecraft:start()
   assert(channels == 3, "No RGB video output")
   assert(height == self.height or width == self.width, "Video output dimensions don't match those requested")
 
-  -- Set the time limit for mission (in seconds)
-  mission:timeLimitInSeconds(self.time_limit)
-
   local status, err = pcall(function() self.agent_host:startMission( mission, mission_record ) end)
   if not status then
     print("Error starting mission: "..err)
@@ -216,7 +236,7 @@ function Minecraft:start()
 end
 
 -- Move up, right, down or left
-function Minecraft:step(action)
+function Minecraft:_step(action)
   -- Do something
   local action = self.actions[action]
   self.agent_host:sendCommand(action)
@@ -247,7 +267,7 @@ function Minecraft:step(action)
     self.proc_frames = self:processFrames(world_state.video_frames)
   end
 
-  local terminal = world_state.is_mission_running
+  local terminal = not world_state.is_mission_running
 
   sleep(0.1)
 
diff --git a/rlenvs/MountainCar.lua b/rlenvs/MountainCar.lua
index 8d62109..26daec2 100644
--- a/rlenvs/MountainCar.lua
+++ b/rlenvs/MountainCar.lua
@@ -1,32 +1,47 @@
 local classic = require 'classic'
 
 local MountainCar, super = classic.class('MountainCar', Env)
+MountainCar.timeStepLimit = 200
 
 -- Constructor
 function MountainCar:_init(opts)
   opts = opts or {}
+  opts.timeStepLimit = MountainCar.timeStepLimit
+
+  super._init(self, opts)
 end
 
 -- 2 states returned, of type 'real', of dimensionality 1, with differing ranges
-function MountainCar:getStateSpec()
-  return {
-    {'real', 1, {-0.07, 0.07}}, -- Velocity
-    {'real', 1, {-1.2, 0.6}} -- Position
+function MountainCar:getStateSpace()
+  local state = {}
+  state['name'] = 'Box'
+  state['shape'] = {2}
+  state['low'] = {
+    -0.07, -- Velocity
+    -1.2 -- Position
+  }
+  state['high'] = {
+    0.07, -- Velocity
+    0.6 -- Position
   }
+  return state
 end
 
 -- 1 action required, of type 'int', of dimensionality 1, between -1 and 1 (left, neutral, right)
-function MountainCar:getActionSpec()
-  return {'int', 1, {-1, 1}}
+function MountainCar:getActionSpace()
+  local action = {}
+  action['name'] = 'Discrete'
+  action['n'] = 3
+  return action
 end
 
 -- Min and max reward
-function MountainCar:getRewardSpec()
+function MountainCar:getRewardSpace()
   return -2, 0 -- As height = sin(3x) is between -1 and 1, and reward = height - 1
 end
 
 -- Resets the car
-function MountainCar:start()
+function MountainCar:_start()
   -- Reset position and velocity
   self.position = -0.5
   self.velocity = 0
@@ -35,12 +50,13 @@ function MountainCar:start()
 end
 
 -- Drives the car
-function MountainCar:step(action)
+function MountainCar:_step(action)
+  action = action - 1  -- scale action
   -- Calculate height
   local height = math.sin(3*self.position)
 
   -- Update velocity and position
-  self.velocity = self.velocity + 0.001*action - 0.0025*math.cos(3*self.position)
+  self.velocity = self.velocity + 0.001 * action - 0.0025 * math.cos(3 * self.position)
   self.velocity = math.max(self.velocity, -0.07)
   self.velocity = math.min(self.velocity, 0.07)
   self.position = self.position + self.velocity
diff --git a/rlenvs/MultiArmedBandit.lua b/rlenvs/MultiArmedBandit.lua
index 9261221..d4e6c75 100644
--- a/rlenvs/MultiArmedBandit.lua
+++ b/rlenvs/MultiArmedBandit.lua
@@ -5,6 +5,7 @@ local MultiArmedBandit, super = classic.class('MultiArmedBandit', Env)
 -- Constructor
 function MultiArmedBandit:_init(opts)
   opts = opts or {}
+  super._init(self, opts)
 
   -- Restless bandits (with a Gaussian random walk)
   self.restless = opts.restless or false
@@ -19,27 +20,31 @@ function MultiArmedBandit:_init(opts)
 end
 
 -- No state (not a contextual bandit)
-function MultiArmedBandit:getStateSpec()
+function MultiArmedBandit:getStateSpace()
   return nil
 end
 
 -- 1 action required, of type 'int', of dimensionality 1, of the number of arms
-function MultiArmedBandit:getActionSpec()
-  return {'int', 1, {1, self.nArms}}
+function MultiArmedBandit:getActionSpace()
+  local action = {}
+  action['name'] = 'Discrete'
+  action['n'] = self.nArms
+  return action
 end
 
 -- Min and max rewards unknown when sampling from distributions
-function MultiArmedBandit:getRewardSpec()
+function MultiArmedBandit:getRewardSpace()
   return nil, nil
 end
 
 -- Does nothing (distributions do not reset)
-function MultiArmedBandit:start()
+function MultiArmedBandit:_start()
   return nil
 end
 
 -- Pulls an arm
-function MultiArmedBandit:step(action)
+function MultiArmedBandit:_step(action)
+  action = action + 1 -- scale action
   -- Sample for reward
   local reward = torch.normal(self.armMeans[action], 1)
 
diff --git a/rlenvs/RandomWalk.lua b/rlenvs/RandomWalk.lua
index 8594330..a1df9b2 100644
--- a/rlenvs/RandomWalk.lua
+++ b/rlenvs/RandomWalk.lua
@@ -5,32 +5,39 @@ local RandomWalk, super = classic.class('RandomWalk', Env)
 -- Constructor
 function RandomWalk:_init(opts)
   opts = opts or {}
+  super._init(self, opts)
 end
 
 -- 1 states returned, of type 'int', of dimensionality 1, between 0 and 6 (the terminal states)
-function RandomWalk:getStateSpec()
-  return {'int', 1, {0, 6}} -- Position
+function RandomWalk:getStateSpace()
+  local state = {}
+  state['name'] = 'Discrete'
+  state['n'] = 6
+  return state
 end
 
 -- 1 action required, of type 'int', of dimensionality 1, between 0 and 1 (left or right)
-function RandomWalk:getActionSpec()
-  return {'int', 1, {0, 1}}
+function RandomWalk:getActionSpace()
+  local action = {}
+  action['name'] = 'Discrete'
+  action['n'] = 2
+  return action
 end
 
 -- Min and max reward
-function RandomWalk:getRewardSpec()
+function RandomWalk:getRewardSpace()
   return 0, 1
 end
 
 -- Reset position
-function RandomWalk:start()
+function RandomWalk:_start()
   self.position = 3
 
   return self.position
 end
 
 -- Move left or right
-function RandomWalk:step(action)
+function RandomWalk:_step(action)
   local reward = 0
   local terminal = false
 
diff --git a/rlenvs/Taxi.lua b/rlenvs/Taxi.lua
index 61d0ba5..962b90c 100644
--- a/rlenvs/Taxi.lua
+++ b/rlenvs/Taxi.lua
@@ -12,10 +12,14 @@ local classic = require 'classic'
 --]]
 
 local Taxi, super = classic.class('Taxi', Env)
+Taxi.timeStepLimit = 200
 
 -- Constructor
 function Taxi:_init(opts)
   opts = opts or {}
+  opts.timeStepLimit = Taxi.timeStepLimit
+
+  super._init(self, opts)
 
   -- Passenger positions (Red, Green, Blue, Yellow)
   self.rgbyPos = {{0, 4}, {4, 4}, {3, 0}, {0, 0}}
@@ -24,28 +28,42 @@ function Taxi:_init(opts)
 end
 
 -- 4 states returned, of type 'int', of dimensionality 1, where x and y are 0-5, fuel is -1-12, passenger position is 1-5 and destination is 1-4
-function Taxi:getStateSpec()
-  return {
-    {'int', 1, {0, 4}}, -- x
-    {'int', 1, {0, 4}}, -- y
-    {'int', 1, {-1, 12}}, -- Fuel
-    {'int', 1, {1, 5}}, -- Passenger location
-    {'int', 1, {1, 4}}, -- Destination TODO: Work out why there are apparently 5 destination states in the original paper
+function Taxi:getStateSpace()
+  local state = {}
+  state['name'] = 'Box'
+  state['shape'] = {5}
+  state['low'] = {
+    0, -- x
+    0, -- y
+    -1, -- Fuel
+    1, -- Passenger location
+    1 -- Destination TODO: Work out why there are apparently 5 destination states in the original paper
+  }
+  state['high'] = {
+    4, -- x
+    4, -- y
+    12, -- Fuel
+    5, -- Passenger location
+    4 -- Destination
   }
+  return state
 end
 
 -- 1 action required, of type 'int', of dimensionality 1, where 1-4 is move N, E, S, W, 5 is Pickup, 6 is Putdown and 7 is Fillup
-function Taxi:getActionSpec()
-  return {'int', 1, {1, 7}}
+function Taxi:getActionSpace()
+  local action = {}
+  action['name'] = 'Discrete'
+  action['n'] = 7
+  return action
 end
 
 -- Min and max reward
-function Taxi:getRewardSpec()
+function Taxi:getRewardSpace()
   return -20, 20
 end
 
 -- Reset position, fuel and passenger
-function Taxi:start()
+function Taxi:_start()
   -- Randomise position and fuel
   self.position = {torch.random(0, 4), torch.random(0, 4)}
   self.fuel = torch.random(5, 12)
@@ -90,7 +108,7 @@ function Taxi:validMove(action)
 end
 
 -- Move up, right, down or left
-function Taxi:step(action)
+function Taxi:_step(action)
   local reward = -1
   local terminal = false
 
diff --git a/rlenvs/WindyWorld.lua b/rlenvs/WindyWorld.lua
index 9cba841..00fdce1 100644
--- a/rlenvs/WindyWorld.lua
+++ b/rlenvs/WindyWorld.lua
@@ -5,42 +5,56 @@ local WindyWorld, super = classic.class('WindyWorld', Env)
 -- Constructor
 function WindyWorld:_init(opts)
   opts = opts or {}
+  super._init(self, opts)
 
   -- Allow king's moves (8 directions)
   self.king = opts.king or false
 end
 
 -- 2 states returned, of type 'int', of dimensionality 1, where x is 1-10 and y is 1-7
-function WindyWorld:getStateSpec()
-  return {
-    {'int', 1, {1, 10}}, -- x
-    {'int', 1, {1, 7}} -- y
+function WindyWorld:getStateSpace()
+  local state = {}
+  state['name'] = 'Box'
+  state['shape'] = {5}
+  state['low'] = {
+    1, -- x
+    1 -- y
   }
+  state['high'] = {
+    10, -- x
+    7 -- y
+  }
+  return state
 end
 
 -- 1 action required, of type 'int', of dimensionality 1, between 1 and 4 (for standard) or 1 and 8 (for king)
-function WindyWorld:getActionSpec()
+function WindyWorld:getActionSpace()
+  local action = {}
+  action['name'] = 'Discrete'
   if self.king then
-    return {'int', 1, {1, 8}}
+    action['n'] = 8
+    return action
   else
-    return {'int', 1, {1, 4}}
+    action['n'] = 4
+    return action
   end
 end
 
 -- Min and max reward
-function WindyWorld:getRewardSpec()
+function WindyWorld:getRewardSpace()
   return -1, -1
 end
 
 -- Reset position
-function WindyWorld:start()
+function WindyWorld:_start()
   self.position = {1, 4}
 
   return self.position
 end
 
 -- Move up, right, down or left
-function WindyWorld:step(action)
+function WindyWorld:_step(action)
+  action = action + 1 -- scale action
   local terminal = false
 
   -- Move
diff --git a/rlenvs/XOWorld.lua b/rlenvs/XOWorld.lua
index 691d61c..72f0ed9 100644
--- a/rlenvs/XOWorld.lua
+++ b/rlenvs/XOWorld.lua
@@ -6,6 +6,7 @@ local XOWorld, super = classic.class('XOWorld', Env)
 -- Constructor
 function XOWorld:_init(opts)
   opts = opts or {}
+  super._init(self, opts)
 
   -- Game mode (all circles, negative, or circles and crosses, negative and positive)
   self.double = opts.double or false
@@ -102,24 +103,36 @@ function XOWorld:_init(opts)
                        {67, 7}, {67, 27}, {67, 47}, {67, 67}}
 end
 
--- 1 state returned, of type 'int', of dimensionality 1 x self.size x self.size, between 0 and 1
-function XOWorld:getStateSpec()
-  return {'int', {1, self.size, self.size}, {0, 1}}
+-- 1 state returned, of type 'real', of dimensionality 3 x 210 x 160, between 0 and 1
+function XOWorld:getStateSpace()
+    local state = {}
+    state['name'] = 'Box'
+    state['shape'] = {1, self.size, self.size}
+    state['low'] = {
+        0
+    }
+    state['high'] = {
+        1
+    }
+    return state
 end
 
 -- 1 action required, of type 'int', of dimensionality 1, between 0 and 3
-function XOWorld:getActionSpec()
-  return {'int', 1, {0, 3}}
+function XOWorld:getActionSpace()
+    local action = {}
+    action['name'] = 'Discrete'
+    action['n'] = 4
+    return action
 end
 
 -- RGB screen of size self.size x self.size
 function XOWorld:getDisplaySpec()
-  return {'real', {3, self.size, self.size}, {0, 1}}
+    return {'real', {3, self.size, self.size}, {0, 1}}
 end
 
 -- Min and max reward
-function XOWorld:getRewardSpec()
-  return -10, 10
+function XOWorld:getRewardSpace()
+    return -10, 10
 end
 
 -- Redraws screen based on state and performs collision detection
@@ -188,7 +201,7 @@ function XOWorld:update()
 end
 
 -- Starts new game
-function XOWorld:start()
+function XOWorld:_start()
   -- Reset time
   self.time = 1
 
@@ -257,7 +270,7 @@ function XOWorld:start()
 end
 
 -- Steps in a game
-function XOWorld:step(action)
+function XOWorld:_step(action)
   -- Move player
   if action == 0 then
     self.x = math.max(self.x - 1, 1)
diff --git a/rlenvs/init.lua b/rlenvs/init.lua
index c29cfc9..ca94a8a 100644
--- a/rlenvs/init.lua
+++ b/rlenvs/init.lua
@@ -1,7 +1,7 @@
 local rlenvs = {}
 
 -- Include environments
-rlenvs.Env = require 'rlenvs/Env'
+Env = require 'rlenvs/Env'
 rlenvs.Acrobot = require 'rlenvs/Acrobot'
 rlenvs.Atari = require 'rlenvs/Atari'
 rlenvs.Blackjack = require 'rlenvs/Blackjack'
@@ -17,6 +17,7 @@ rlenvs.MultiArmedBandit = require 'rlenvs/MultiArmedBandit'
 rlenvs.RandomWalk = require 'rlenvs/RandomWalk'
 rlenvs.Taxi = require 'rlenvs/Taxi'
 rlenvs.WindyWorld = require 'rlenvs/WindyWorld'
+rlenvs.XOWorld = require 'rlenvs/XOWorld'
 
 -- Remove nil environments
 for k, v in pairs(rlenvs) do
@@ -25,4 +26,10 @@ for k, v in pairs(rlenvs) do
   end
 end
 
+local envs ={}
+for k,_ in pairs(rlenvs) do
+  envs[#envs+1]=k
+end
+rlenvs.envs = envs
+
 return rlenvs
diff --git a/tests/test.lua b/tests/test.lua
new file mode 100644
index 0000000..32a36f2
--- /dev/null
+++ b/tests/test.lua
@@ -0,0 +1,41 @@
+require 'torch'
+local rlenvs = require 'rlenvs'
+
+local test = torch.TestSuite()
+local tester
+
+function test.envs()
+  for index, env in ipairs(rlenvs.envs) do
+    local function runTest()
+      local Env = require('rlenvs.' .. env)
+      -- Initialise and start environment
+      local env = Env()
+      local actionSpace = env:getActionSpace()
+      local observation = env:start()
+      -- Pick random action and execute it
+      local action = torch.random(0, actionSpace['n'] - 1)
+      local reward, observation, terminal = env:step(action)
+      -- Display if implemented
+      env:render()
+    end
+
+    if env == 'Atari' then
+      local hasALEWrap = pcall(require, 'alewrap')
+      if not hasALEWrap then
+        tester:assert(pcall(runTest), 'Failed to run rlenv environment ' .. env)
+      end
+    elseif env == 'Minecraft' then
+      local hasSocket = pcall(require, 'socket')
+      local hasLibMalmoLua = pcall(require, 'libMalmoLua')
+      if not hasSocket and hasLibMalmoLua then
+        tester:assert(pcall(runTest), 'Failed to run rlenv environment ' .. env)
+      end
+    else
+      tester:assert(pcall(runTest), 'Failed to run rlenv environment ' .. env)
+    end
+  end
+end
+
+tester = torch.Tester()
+tester:add(test)
+tester:run()
\ No newline at end of file