diff --git a/CMakeLists.txt b/CMakeLists.txt index 0de2757..27d0a35 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ FIND_PACKAGE(Torch REQUIRED) FILE(GLOB luasrc src/*.lua) -ADD_TORCH_PACKAGE(rl "" "${luasrc}") +ADD_TORCH_PACKAGE(twrl "" "${luasrc}") -INSTALL(DIRECTORY "src/agent" DESTINATION "${Torch_INSTALL_LUA_PATH_SUBDIR}/rl") -INSTALL(DIRECTORY "src/gym-http-api/binding-lua" DESTINATION "${Torch_INSTALL_LUA_PATH_SUBDIR}/rl") \ No newline at end of file +INSTALL(DIRECTORY "src/agent" DESTINATION "${Torch_INSTALL_LUA_PATH_SUBDIR}/twrl") +INSTALL(DIRECTORY "src/gym-http-api/binding-lua" DESTINATION "${Torch_INSTALL_LUA_PATH_SUBDIR}/twrl") \ No newline at end of file diff --git a/examples/run.lua b/examples/run.lua index 4dceb75..5847186 100644 --- a/examples/run.lua +++ b/examples/run.lua @@ -62,4 +62,4 @@ local nSteps, nIterations = params.nSteps, params.nIterations params.outdir = logDir .. '/gym' -- run test -local performance = require 'rl.experiment'(env, agent, nSteps, nIterations, params) +local performance = require 'twrl.experiment'(env, agent, nSteps, nIterations, params) diff --git a/src/agent/baseAgent.lua b/src/agent/baseAgent.lua index 1218b87..2289199 100644 --- a/src/agent/baseAgent.lua +++ b/src/agent/baseAgent.lua @@ -15,7 +15,7 @@ local function getAgent(opt) opt.nHiddenLayerSize = opt.nHiddenLayerSize or 10 if opt.model then local modelName = opt.model - model = require('rl.agent.model.' .. opt.model)({ + model = require('twrl.agent.model.' .. opt.model)({ nInputs = envDetails.nbStates, nOutputs = envDetails.nbActions, nHiddenLayerSize = opt.nHiddenLayerSize, @@ -27,7 +27,7 @@ local function getAgent(opt) }) end - policy = require('rl.agent.policy')[opt.policy]({ + policy = require('twrl.agent.policy')[opt.policy]({ client = opt.client, instanceID = instanceID, envDetails = envDetails, @@ -41,7 +41,7 @@ local function getAgent(opt) randomActionSampler = randomActionSampler }) - local learn = require('rl.agent.learningUpdate.' .. opt.learningUpdate)({ + local learn = require('twrl.agent.learningUpdate.' .. opt.learningUpdate)({ model = model, envDetails = envDetails, tdLearnUpdate = opt.tdLearnUpdate, diff --git a/src/agent/init.lua b/src/agent/init.lua index f1041f1..e9b8494 100644 --- a/src/agent/init.lua +++ b/src/agent/init.lua @@ -1,8 +1,8 @@ local agent = { - baseAgent = require 'rl.agent.baseAgent', - learningUpdate = require 'rl.agent.learningUpdate', - model = require 'rl.agent.model', - policy = require 'rl.agent.policy' + baseAgent = require 'twrl.agent.baseAgent', + learningUpdate = require 'twrl.agent.learningUpdate', + model = require 'twrl.agent.model', + policy = require 'twrl.agent.policy' } return agent \ No newline at end of file diff --git a/src/agent/learningUpdate/init.lua b/src/agent/learningUpdate/init.lua index cc091ff..2655e41 100644 --- a/src/agent/learningUpdate/init.lua +++ b/src/agent/learningUpdate/init.lua @@ -1,8 +1,8 @@ local learningUpdate = { - noLearning = require 'rl.agent.learningUpdate.noLearning', - reinforce = require 'rl.agent.learningUpdate.reinforce', - tdLambda = require 'rl.agent.learningUpdate.tdLambda', - util = require 'rl.agent.learningUpdate.util' + noLearning = require 'twrl.agent.learningUpdate.noLearning', + reinforce = require 'twrl.agent.learningUpdate.reinforce', + tdLambda = require 'twrl.agent.learningUpdate.tdLambda', + util = require 'twrl.agent.learningUpdate.util' } return learningUpdate \ No newline at end of file diff --git a/src/agent/learningUpdate/reinforce.lua b/src/agent/learningUpdate/reinforce.lua index f63576f..9c6fed4 100644 --- a/src/agent/learningUpdate/reinforce.lua +++ b/src/agent/learningUpdate/reinforce.lua @@ -9,7 +9,7 @@ local function getLearningUpdate(opt) local gradClip = opt.gradClip local optim = require 'optim' local mo = require 'moses' - local util = require 'rl.agent.learningUpdate.util' + local util = require 'twrl.agent.learningUpdate.util' local smallEps = util.smallEps local model = opt.model local net = model.net diff --git a/src/agent/model/init.lua b/src/agent/model/init.lua index 376ea33..6e09605 100644 --- a/src/agent/model/init.lua +++ b/src/agent/model/init.lua @@ -1,8 +1,8 @@ local model = { - mlp = require 'rl.agent.model.mlp', - noModel = require 'rl.agent.model.noModel', - qFunction = require 'rl.agent.model.qFunction', - tilecoding = require 'rl.agent.model.tilecoding' + mlp = require 'twrl.agent.model.mlp', + noModel = require 'twrl.agent.model.noModel', + qFunction = require 'twrl.agent.model.qFunction', + tilecoding = require 'twrl.agent.model.tilecoding' } return model \ No newline at end of file diff --git a/src/agent/model/qFunction.lua b/src/agent/model/qFunction.lua index f088e8d..f5523da 100644 --- a/src/agent/model/qFunction.lua +++ b/src/agent/model/qFunction.lua @@ -24,7 +24,7 @@ local function getModel(opt) local stateScalingFactor, stateMins = getStateMinsAndScaling(envDetails, numTilings) local memorySize = numTiles * numTiles - local tc = require 'rl.agent.model.tilecoding'({numTilings = numTilings, memorySize = memorySize, scaleFactor = stateScalingFactor, stateMins = stateMins}) + local tc = require 'twrl.agent.model.tilecoding'({numTilings = numTilings, memorySize = memorySize, scaleFactor = stateScalingFactor, stateMins = stateMins}) local weights = torch.FloatTensor(numTilings * memorySize * nbActions):zero():fill(initialWeightVal) local eligibility = torch.FloatTensor(numTilings * memorySize * nbActions):zero():fill(0) diff --git a/src/agent/policy/init.lua b/src/agent/policy/init.lua index db581a3..c51b6c0 100644 --- a/src/agent/policy/init.lua +++ b/src/agent/policy/init.lua @@ -1,14 +1,14 @@ -local actionSamplers = require 'rl.agent.policy.actionSamplers' +local actionSamplers = require 'twrl.agent.policy.actionSamplers' local policy = { - egreedy = require 'rl.agent.policy.egreedy', - normal = require 'rl.agent.policy.stochasticModelPolicy'({ + egreedy = require 'twrl.agent.policy.egreedy', + normal = require 'twrl.agent.policy.stochasticModelPolicy'({ actionSampler = actionSamplers.normal }), - categorical = require 'rl.agent.policy.stochasticModelPolicy'({ + categorical = require 'twrl.agent.policy.stochasticModelPolicy'({ actionSampler = actionSamplers.categorical }), - random = require 'rl.agent.policy.random', + random = require 'twrl.agent.policy.random', } return policy \ No newline at end of file diff --git a/src/experiment.lua b/src/experiment.lua index 8cab4af..3f5de56 100644 --- a/src/experiment.lua +++ b/src/experiment.lua @@ -1,6 +1,6 @@ local function experiment(envName, agent, nSteps, nIterations, opt) - local util = require 'rl.util'() - local gymClient = require 'rl.binding-lua.gym_http_client' + local util = require 'twrl.util'() + local gymClient = require 'twrl.binding-lua.gym_http_client' local opt = opt or {} local base = 'http://127.0.0.1:5000' local client = gymClient.new(base) @@ -11,7 +11,7 @@ local function experiment(envName, agent, nSteps, nIterations, opt) local resume = opt.resume local renderAllSteps = opt.renderAllSteps local render = renderAllSteps == 'true' and true or false - local perf = require 'rl.perf'({nIterations = nSteps, windowSize = opt.windowSize}) + local perf = require 'twrl.perf'({nIterations = nSteps, windowSize = opt.windowSize}) local function run() client:env_monitor_start(instanceID, outdir, force, resume, video) @@ -24,7 +24,7 @@ local function experiment(envName, agent, nSteps, nIterations, opt) agentOpt.learningUpdate = agent.learningUpdate agentOpt.envDetails = util.getStateAndActionSpecs(agentOpt.stateSpace, agentOpt.actionSpace) function agentOpt.randomActionSampler() return client:env_action_space_sample(instanceID) end - local agent = require 'rl.agent.baseAgent'(agentOpt) + local agent = require 'twrl.agent.baseAgent'(agentOpt) local iterPerformance = {} for nIter = 1, nIterations do @@ -50,7 +50,7 @@ local function experiment(envName, agent, nSteps, nIterations, opt) client:env_monitor_close(instanceID) if opt.uploadResults == 'true' then - print('Uploading results, check server for URL.') + print('Uploading results, check server for URL') -- Upload to the scoreboard, OPENAI_GYM_API_KEY must be set client:upload(outdir) end diff --git a/src/init.lua b/src/init.lua index 098d029..8104576 100644 --- a/src/init.lua +++ b/src/init.lua @@ -1,15 +1,15 @@ -local rl = {} +local twrl = {} -- Meta info -rl.VERSION = '0.1' -rl.LICENSE = 'MIT' +twrl.VERSION = '0.1' +twrl.LICENSE = 'MIT' -- Utility packages -rl.agent = require 'rl.agent' -rl.util = require 'rl.util' -rl.perf = require 'rl.perf' -rl.experiment = require 'rl.experiment' -rl.gymClient = require 'rl.binding-lua.gym_http_client' +twrl.agent = require 'twrl.agent' +twrl.util = require 'twrl.util' +twrl.perf = require 'twrl.perf' +twrl.experiment = require 'twrl.experiment' +twrl.gymClient = require 'twrl.binding-lua.gym_http_client' -- Return package -return rl \ No newline at end of file +return twrl \ No newline at end of file diff --git a/test/test-gym.lua b/test/test-gym.lua index 1a88110..43a0c1b 100644 --- a/test/test-gym.lua +++ b/test/test-gym.lua @@ -46,7 +46,7 @@ function mujoco.testMujoco() end function experiment.badExperimentCall() - local performance = require 'rl.experiment'() + local performance = require 'twrl.experiment'() tester:eq(performance, {}, "bad experiment call should fail with bad settings ") end @@ -60,7 +60,7 @@ function experiment.randomNoLearningNoModel() } local nSteps = 2 local nIterations = 2 - local performance = require 'rl.experiment'(env, agent, nSteps, nIterations, params) + local performance = require 'twrl.experiment'(env, agent, nSteps, nIterations, params) tester:eq(performance.iteration, 2, "basic experiment should run") end diff --git a/test/test.lua b/test/test.lua index 6f8d237..a277ac8 100644 --- a/test/test.lua +++ b/test/test.lua @@ -6,7 +6,7 @@ local tilecoding = torch.TestSuite() local tester = torch.Tester() -- Load all -local util = require 'rl.util'() +local util = require 'twrl.util'() function base.torchTensor() local a = {2, torch.Tensor{1, 2, 2}} @@ -19,7 +19,7 @@ function tilecoding.tilecodeConsistent() local numTiles = 32 local memorySize = numTiles * numTiles local stateScalingFactor = {1, 1} - local tc = require 'rl.agent.model.tilecoding'(({ + local tc = require 'twrl.agent.model.tilecoding'(({ numTilings = numTilings, memorySize = memorySize, scaleFactor = stateScalingFactor @@ -35,7 +35,7 @@ function tilecoding.tilecodePredictable() local numTiles = 32 local memorySize = numTiles * numTiles local stateScalingFactor = {1, 1} - local tc = require 'rl.agent.model.tilecoding'(({ + local tc = require 'twrl.agent.model.tilecoding'(({ numTilings = numTilings, memorySize = memorySize, scaleFactor = stateScalingFactor @@ -47,25 +47,25 @@ function tilecoding.tilecodePredictable() end function performance.reset() - local perf = require 'rl.perf'() + local perf = require 'twrl.perf'() local emptyTable = perf.reset() tester:eq(emptyTable, {}, "performance: reset failed") end function performance.addRewardTerminal() - local perf = require 'rl.perf'() + local perf = require 'twrl.perf'() local traj, trajs = perf.addReward(1, 1, true) tester:eq(traj, {}, "performance: add reward terminal failed") end function performance.addRewardNonTerminal() - local perf = require 'rl.perf'() + local perf = require 'twrl.perf'() local traj, trajs = perf.addReward(1, 1, false) tester:eq(traj, {1}, "performance: add reward non-terminal failed") end function performance.getSummary() - local perf = require 'rl.perf'({windowSize = 10}) + local perf = require 'twrl.perf'({windowSize = 10}) local _, _ = perf.addReward(1, 0, false) local _, _ = perf.addReward(1, 0, false) local _, _ = perf.addReward(1, 1, true) diff --git a/rl-scm-1.rockspec b/twrl-scm-1.rockspec similarity index 77% rename from rl-scm-1.rockspec rename to twrl-scm-1.rockspec index b22849f..637e25f 100644 --- a/rl-scm-1.rockspec +++ b/twrl-scm-1.rockspec @@ -1,15 +1,15 @@ -package = "rl" +package = "twrl" version = "scm-1" source = { - url = "https://github.com/twitter/torch-rl.git" + url = "https://github.com/twitter/torch-twrl.git" } description = { summary = "Reinforcement Learning for Torch and Lua", detailed = [[ - torch-rl is a Reinforcement Learning framework + torch-twrl is a Reinforcement Learning framework built for Torch. It interfaces with OpenAI Gym. ]], - homepage = "https://github.com/twitter/torch-rl", + homepage = "https://github.com/twitter/torch-twrl", license = "MIT" } dependencies = {