You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
The two functions
reward(::TicTacToeEnv,::Player)
s_terminated(::TicTacToeEnv)
result in a small but needless allocation due to a type instability in call to get_tic_tac_toe_state_info()
To see this, you can use:
using ReinforcementLearning
using BenchmarkTools
env =TicTacToeEnv()
display(@benchmarkreward($env))
display(@benchmarkis_terminated($env))
I was able to fix this problem (and save about 7% of time) with 3 small changes to TicTacToeEnv.jl. There may be other ways to fix this, but these were the simplest changes I could find.
import ReinforcementLearningEnvironments: get_tic_tac_toe_state_info
function ReinforcementLearningEnvironments.get_tic_tac_toe_state_info()
ifisempty(RLEnvs.TIC_TAC_TOE_STATE_INFO)
@info"initializing tictactoe state info cache..."
t =@elapsedbegin
n =1
root =TicTacToeEnv()
RLEnvs.TIC_TAC_TOE_STATE_INFO[root] =
(index=n, is_terminated=false, winner=nothing)
walk(root) do env
if!haskey(TIC_TAC_TOE_STATE_INFO, env)
n +=1
has_empty_pos =any(view(env.board, :, :, 1))
w =ifis_win(env, Player(:Cross))
Player(:Cross)
elseifis_win(env, Player(:Nought))
Player(:Nought)
elsenothingend
RLEnvs.TIC_TAC_TOE_STATE_INFO[env] = (
index=n,
is_terminated=!(has_empty_pos &&isnothing(w)),
winner=w,
)
endendend@info"finished initializing tictactoe state info cache in $t seconds"end# CHANGE: declare type explicitly
RLEnvs.TIC_TAC_TOE_STATE_INFO::Dict{TicTacToeEnv,@NamedTuple{index::Int64, is_terminated::Bool, winner::Union{Nothing,Player}}}endimport ReinforcementLearning: reward
function RLBase.reward(env::TicTacToeEnv, player::Player)
# CHANGE: only call get_tic_tac_toe_state_info() if necessaryifisempty(RLEnvs.TIC_TAC_TOE_STATE_INFO)
info_env =get_tic_tac_toe_state_info()[env]
else
info_env = RLEnvs.TIC_TAC_TOE_STATE_INFO[env]
endif info_env.is_terminated
winner = info_env.winner
ifisnothing(winner)
0elseif winner === player
1else-1endelse0endendimport ReinforcementLearning: is_terminated
function RLBase.is_terminated(env::TicTacToeEnv)
# CHANGE: only call get_tic_tac_toe_state_info() if necessaryifisempty(RLEnvs.TIC_TAC_TOE_STATE_INFO)
return info_env =get_tic_tac_toe_state_info()[env].is_terminated
elsereturn info_env = RLEnvs.TIC_TAC_TOE_STATE_INFO[env].is_terminated
endend
The text was updated successfully, but these errors were encountered:
The two functions
reward(::TicTacToeEnv,::Player)
s_terminated(::TicTacToeEnv)
result in a small but needless allocation due to a type instability in call to
get_tic_tac_toe_state_info()
To see this, you can use:
I was able to fix this problem (and save about 7% of time) with 3 small changes to
TicTacToeEnv.jl
. There may be other ways to fix this, but these were the simplest changes I could find.The text was updated successfully, but these errors were encountered: