{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":698566972,"defaultBranch":"main","name":"unRL","ownerLogin":"rafonsor","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-09-30T09:51:00.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/6885870?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1696067461.0","currentOid":""},"activityList":{"items":[{"before":"cbad6b1bd9b3dd7728a4a4f6eaab62f553b3b41d","after":"d7765bcbe2ac2ff056640abd38b6b2e770b13d09","ref":"refs/heads/main","pushedAt":"2023-11-24T17:17:31.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"improve: detach tensors in policy_gradient.py","shortMessageHtmlLink":"improve: detach tensors in policy_gradient.py"}},{"before":"dfdbaa099dfc750e754f873d7305ea595957b29f","after":"cbad6b1bd9b3dd7728a4a4f6eaab62f553b3b41d","ref":"refs/heads/main","pushedAt":"2023-11-22T16:31:15.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"improve: offline and batch NAF","shortMessageHtmlLink":"improve: offline and batch NAF"}},{"before":"1cbe7ed47677d8bb86b1031864b0b88b5dd51132","after":"dfdbaa099dfc750e754f873d7305ea595957b29f","ref":"refs/heads/main","pushedAt":"2023-11-22T09:08:22.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: online NAF","shortMessageHtmlLink":"feat: online NAF"}},{"before":"808cad6e9c7c2442eac201a6a6adda21821ffef2","after":"1cbe7ed47677d8bb86b1031864b0b88b5dd51132","ref":"refs/heads/main","pushedAt":"2023-11-21T23:55:12.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: Agent classes for optimal acting","shortMessageHtmlLink":"feat: Agent classes for optimal acting"}},{"before":"ccad3b3f03985b2e4873faac3f9a9600a070898f","after":"808cad6e9c7c2442eac201a6a6adda21821ffef2","ref":"refs/heads/main","pushedAt":"2023-11-17T19:25:05.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"meta: reorganise model-free algos\nimprove: add ACER to module exports, update rho docstrings","shortMessageHtmlLink":"meta: reorganise model-free algos"}},{"before":"9810bc33fd2f08728192540a345436705a8edf0b","after":"ccad3b3f03985b2e4873faac3f9a9600a070898f","ref":"refs/heads/main","pushedAt":"2023-11-17T19:12:15.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: ACER for continuous Action spaces","shortMessageHtmlLink":"feat: ACER for continuous Action spaces"}},{"before":"72ee1fafcad316252f337e7e2d094e04cc6bf341","after":"9810bc33fd2f08728192540a345436705a8edf0b","ref":"refs/heads/main","pushedAt":"2023-11-16T17:47:12.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: ACER for discrete Action spaces\nfeat: Experience buffer for Trajectories\nimprove: refactor SARS transition container","shortMessageHtmlLink":"feat: ACER for discrete Action spaces"}},{"before":"4a37b155ed057a7a0ecc566edd8060a083ba8335","after":"72ee1fafcad316252f337e7e2d094e04cc6bf341","ref":"refs/heads/main","pushedAt":"2023-11-14T09:26:03.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: DuelingDQN","shortMessageHtmlLink":"feat: DuelingDQN"}},{"before":"e734fe21abef3b5831361ed834039cc586dd0082","after":"4a37b155ed057a7a0ecc566edd8060a083ba8335","ref":"refs/heads/main","pushedAt":"2023-11-12T16:03:50.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: ACKTR","shortMessageHtmlLink":"feat: ACKTR"}},{"before":"1df73023a12fcf18700b92f427598f57c12a6ed0","after":"e734fe21abef3b5831361ed834039cc586dd0082","ref":"refs/heads/main","pushedAt":"2023-11-11T18:14:38.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: K-FAC Optimizer","shortMessageHtmlLink":"feat: K-FAC Optimizer"}},{"before":"d4db74c374b1977afae076578bb903f9962981a5","after":"1df73023a12fcf18700b92f427598f57c12a6ed0","ref":"refs/heads/main","pushedAt":"2023-11-09T17:38:47.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"meta: update plan progress","shortMessageHtmlLink":"meta: update plan progress"}},{"before":"ac18559fa19f05caeed0c92a0299513ed0adc55c","after":"d4db74c374b1977afae076578bb903f9962981a5","ref":"refs/heads/main","pushedAt":"2023-11-05T01:46:43.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"meta: plan - add model-based algos","shortMessageHtmlLink":"meta: plan - add model-based algos"}},{"before":"3734e1337ffe715b773f1e55ea7f9cf568fc28b3","after":"ac18559fa19f05caeed0c92a0299513ed0adc55c","ref":"refs/heads/main","pushedAt":"2023-11-05T01:03:11.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"meta: plan","shortMessageHtmlLink":"meta: plan"}},{"before":"8c45ce64593bcdd3e1f7a12d7fbde992ce2148d7","after":"3734e1337ffe715b773f1e55ea7f9cf568fc28b3","ref":"refs/heads/main","pushedAt":"2023-11-04T22:46:49.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"meta: align methods, module exports","shortMessageHtmlLink":"meta: align methods, module exports"}},{"before":"f308062acabffdd6431f7c638f6107bffe3d9fdd","after":"8c45ce64593bcdd3e1f7a12d7fbde992ce2148d7","ref":"refs/heads/main","pushedAt":"2023-11-04T19:30:48.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: modern Soft Actor-Critic (QSAC) without State-value function","shortMessageHtmlLink":"feat: modern Soft Actor-Critic (QSAC) without State-value function"}},{"before":"55418438b0390492032710bf5bb7f0517616f8e7","after":"f308062acabffdd6431f7c638f6107bffe3d9fdd","ref":"refs/heads/main","pushedAt":"2023-11-04T12:29:23.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"improve: refactor policy and value function objects","shortMessageHtmlLink":"improve: refactor policy and value function objects"}},{"before":"7adaffa4efcdecab654f7a9d9cc75a6f43689e1d","after":"55418438b0390492032710bf5bb7f0517616f8e7","ref":"refs/heads/main","pushedAt":"2023-11-03T18:40:58.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: TwinDelayedDDPG","shortMessageHtmlLink":"feat: TwinDelayedDDPG"}},{"before":"e5775f52acbc22729a90a3b0fc750f2b4066c0d3","after":"7adaffa4efcdecab654f7a9d9cc75a6f43689e1d","ref":"refs/heads/main","pushedAt":"2023-11-02T23:51:30.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: TRPO with support for adaptive penalty","shortMessageHtmlLink":"feat: TRPO with support for adaptive penalty"}},{"before":"7c16f9ed3901941991046f8bf27a2278ee9e8c44","after":"e5775f52acbc22729a90a3b0fc750f2b4066c0d3","ref":"refs/heads/main","pushedAt":"2023-11-02T23:50:34.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: simplified PPO","shortMessageHtmlLink":"feat: simplified PPO"}},{"before":"590f8a940b1075bd6ca7db1f3e9821ec3d48a985","after":"7c16f9ed3901941991046f8bf27a2278ee9e8c44","ref":"refs/heads/main","pushedAt":"2023-11-02T20:48:16.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: full PPO","shortMessageHtmlLink":"feat: full PPO"}},{"before":"ba4877c409cccb95b3fe36860461b8379a95ddb9","after":"590f8a940b1075bd6ca7db1f3e9821ec3d48a985","ref":"refs/heads/main","pushedAt":"2023-11-02T09:14:05.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: simplified PPO","shortMessageHtmlLink":"feat: simplified PPO"}},{"before":"08f0718c965cdec66d910c3adf9ceda6114e8219","after":"ba4877c409cccb95b3fe36860461b8379a95ddb9","ref":"refs/heads/main","pushedAt":"2023-11-01T01:13:09.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: Entropy regularisation for A2C","shortMessageHtmlLink":"feat: Entropy regularisation for A2C"}},{"before":"07126d04359d29eba17b3f67609ff60a969dcb95","after":"08f0718c965cdec66d910c3adf9ceda6114e8219","ref":"refs/heads/main","pushedAt":"2023-11-01T00:52:18.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: A2C","shortMessageHtmlLink":"feat: A2C"}},{"before":"5a7c3834b26e692cdd6302f4ce1710c75f977c9e","after":"07126d04359d29eba17b3f67609ff60a969dcb95","ref":"refs/heads/main","pushedAt":"2023-10-31T23:11:40.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: CliffWalking gym env for online AC, change blackjack for offline REINFORCE","shortMessageHtmlLink":"feat: CliffWalking gym env for online AC, change blackjack for offlin…"}},{"before":"3d930936c5f66e8d4db7c730016dddad5ab6eb94","after":"5a7c3834b26e692cdd6302f4ce1710c75f977c9e","ref":"refs/heads/main","pushedAt":"2023-10-29T22:26:48.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"improve: DQN tweaks, force deletion/gc","shortMessageHtmlLink":"improve: DQN tweaks, force deletion/gc"}},{"before":"3622cdaf8b145248bc2aa316c85b721b7d7daeaa","after":"3d930936c5f66e8d4db7c730016dddad5ab6eb94","ref":"refs/heads/main","pushedAt":"2023-10-29T16:11:27.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"improve: refactor DQN algos, bring full DQN with Prioritised Experience Replay","shortMessageHtmlLink":"improve: refactor DQN algos, bring full DQN with Prioritised Experien…"}},{"before":"a0362710184a9b744929c480463209c8cb411b35","after":"3622cdaf8b145248bc2aa316c85b721b7d7daeaa","ref":"refs/heads/main","pushedAt":"2023-10-29T01:07:09.000Z","pushType":"push","commitsCount":5,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: DQN with Prioritised Experience Replay\nimprove: refactored DQN variants to facility composability","shortMessageHtmlLink":"feat: DQN with Prioritised Experience Replay"}},{"before":"f7a4f4012a89e14d2077417b2305d58ac54f2877","after":"a0362710184a9b744929c480463209c8cb411b35","ref":"refs/heads/main","pushedAt":"2023-10-27T21:01:15.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"improve: modularise game model factories","shortMessageHtmlLink":"improve: modularise game model factories"}},{"before":"46e0daf1a4e4d8f913e3b3af79bd798af18045cf","after":"f7a4f4012a89e14d2077417b2305d58ac54f2877","ref":"refs/heads/main","pushedAt":"2023-10-27T18:03:55.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"bug: add second flag for explicit episode conclusion instead of assuming all conclusions end up in a terminal state.","shortMessageHtmlLink":"bug: add second flag for explicit episode conclusion instead of assum…"}},{"before":"73fd944ae8bdae6489a33e757adf8e5a0bb49a31","after":"46e0daf1a4e4d8f913e3b3af79bd798af18045cf","ref":"refs/heads/main","pushedAt":"2023-10-27T17:08:12.000Z","pushType":"push","commitsCount":5,"pusher":{"login":"rafonsor","name":"Rafael","path":"/rafonsor","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/6885870?s=80&v=4"},"commit":{"message":"feat: Deep Q-Network with Experience Replay","shortMessageHtmlLink":"feat: Deep Q-Network with Experience Replay"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAADtxBprwA","startCursor":null,"endCursor":null}},"title":"Activity · rafonsor/unRL"}