From e8aae0f6b0890f363dbf84b04827cda53515ed2b Mon Sep 17 00:00:00 2001 From: MichaelQiYinChen <89437867+MichaelQiYinChen@users.noreply.github.com> Date: Mon, 10 Apr 2023 17:37:49 -0500 Subject: [PATCH 1/2] Fixed START, GOAL state --- chapter06/cliff_walking.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chapter06/cliff_walking.py b/chapter06/cliff_walking.py index 1bf99038..aef2bf94 100644 --- a/chapter06/cliff_walking.py +++ b/chapter06/cliff_walking.py @@ -35,8 +35,8 @@ ACTIONS = [ACTION_UP, ACTION_DOWN, ACTION_LEFT, ACTION_RIGHT] # initial state action pair values -START = [3, 0] -GOAL = [3, 11] +START = [WORLD_HEIGHT-1, 0] +GOAL = [WORLD_HEIGHT-1, WORLD_WIDTH-1] def step(state, action): i, j = state From 152bc0cd5cdc51efa1631e83b33e0505d29ffe00 Mon Sep 17 00:00:00 2001 From: MichaelQiYinChen <89437867+MichaelQiYinChen@users.noreply.github.com> Date: Tue, 11 Apr 2023 17:01:58 -0500 Subject: [PATCH 2/2] fixed cliffwalking STATE --- chapter06/cliff_walking.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter06/cliff_walking.py b/chapter06/cliff_walking.py index aef2bf94..ef28f5fe 100644 --- a/chapter06/cliff_walking.py +++ b/chapter06/cliff_walking.py @@ -52,7 +52,7 @@ def step(state, action): assert False reward = -1 - if (action == ACTION_DOWN and i == 2 and 1 <= j <= 10) or ( + if (action == ACTION_DOWN and i == 2 and 1 <= j <= WORLD_WIDTH-2) or ( action == ACTION_RIGHT and state == START): reward = -100 next_state = START