We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
See JuliaReinforcementLearning/ReinforcementLearning.jl#980
To reproduce, check out the EpisodeSampleRatioController branch in my fork and run the JuliaRL_NFQ_CartPole experiment
JuliaRL_NFQ_CartPole
Stacktrace
ERROR: type NamedTuple has no field terminal Stacktrace: [1] getproperty @ ./Base.jl:37 [inlined] [2] on_insert! @ ~/.julia/dev/ReinforcementLearningTrajectories/src/controllers.jl:78 [inlined] [3] on_insert!(t::Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}, n::Int64, x::NamedTuple{(:state,), Tuple{Vector{Float32}}}) @ ReinforcementLearningTrajectories ~/.julia/dev/ReinforcementLearningTrajectories/src/trajectory.jl:114 [4] on_insert!(t::Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}, x::NamedTuple{(:state,), Tuple{Vector{Float32}}}) @ ReinforcementLearningTrajectories ~/.julia/dev/ReinforcementLearningTrajectories/src/trajectory.jl:113 [5] push!(t::Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}, x::NamedTuple{(:state,), Tuple{Vector{Float32}}}) @ ReinforcementLearningTrajectories ~/.julia/dev/ReinforcementLearningTrajectories/src/trajectory.jl:105 [6] push!(agent::Agent{QBasedPolicy{NFQ{Approximator{Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.σ), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, Flux.Optimise.RMSProp}, Random.TaskLocalRNG, typeof(Flux.Losses.mse)}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}}, #unused#::PreEpisodeStage, env::ReinforcementLearningEnvironments.CartPoleEnv{Float32, Int64}) @ ReinforcementLearningCore ~/.julia/dev/ReinforcementLearning/src/ReinforcementLearningCore/src/policies/agent/agent_base.jl:44 [7] macro expansion @ ~/.julia/packages/TimerOutputs/RsWnF/src/TimerOutput.jl:253 [inlined] [8] _run(policy::Agent{QBasedPolicy{NFQ{Approximator{Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.σ), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, Flux.Optimise.RMSProp}, Random.TaskLocalRNG, typeof(Flux.Losses.mse)}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}}, env::ReinforcementLearningEnvironments.CartPoleEnv{Float32, Int64}, stop_condition::StopAfterStep{ProgressMeter.Progress}, hook::TotalRewardPerEpisode{Val{true}, Float64}, reset_condition::ResetAtTerminal) @ ReinforcementLearningCore ~/.julia/dev/ReinforcementLearning/src/ReinforcementLearningCore/src/core/run.jl:92 [9] run(policy::Agent{QBasedPolicy{NFQ{Approximator{Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.σ), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, Flux.Optimise.RMSProp}, Random.TaskLocalRNG, typeof(Flux.Losses.mse)}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}}, env::ReinforcementLearningEnvironments.CartPoleEnv{Float32, Int64}, stop_condition::StopAfterStep{ProgressMeter.Progress}, hook::TotalRewardPerEpisode{Val{true}, Float64}, reset_condition::ResetAtTerminal) @ ReinforcementLearningCore ~/.julia/dev/ReinforcementLearning/src/ReinforcementLearningCore/src/core/run.jl:75 [10] run(policy::Agent{QBasedPolicy{NFQ{Approximator{Flux.Chain{Tuple{Flux.Dense{typeof(NNlib.σ), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(NNlib.relu), Matrix{Float32}, Vector{Float32}}, Flux.Dense{typeof(identity), Matrix{Float32}, Vector{Float32}}}}, Flux.Optimise.RMSProp}, Random.TaskLocalRNG, typeof(Flux.Losses.mse)}, EpsilonGreedyExplorer{:exp, false, StableRNGs.LehmerRNG}}, Trajectory{EpisodesBuffer{(:state, :next_state, :action, :reward, :terminal), Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, CircularArraySARTSTraces{Tuple{MultiplexTraces{(:state, :next_state), Trace{CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}, 5, Tuple{SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, SubArray{Float32, 1, CircularArrayBuffers.CircularArrayBuffer{Float32, 2, Matrix{Float32}}, Tuple{Base.Slice{Base.OneTo{Int64}}, Int64}, true}, Trace{CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, SubArray{Int64, 0, CircularArrayBuffers.CircularVectorBuffer{Int64, Vector{Int64}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, SubArray{Float32, 0, CircularArrayBuffers.CircularVectorBuffer{Float32, Vector{Float32}}, Tuple{Int64}, true}}, Trace{CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, SubArray{Bool, 0, CircularArrayBuffers.CircularVectorBuffer{Bool, Vector{Bool}}, Tuple{Int64}, true}}}}, DataStructures.CircularBuffer{Int64}, DataStructures.CircularBuffer{Bool}}, BatchSampler{(:state, :next_state, :action, :reward, :terminal)}, EpisodeSampleRatioController, typeof(identity)}}, env::ReinforcementLearningEnvironments.CartPoleEnv{Float32, Int64}, stop_condition::StopAfterStep{ProgressMeter.Progress}, hook::TotalRewardPerEpisode{Val{true}, Float64}) @ ReinforcementLearningCore ~/.julia/dev/ReinforcementLearning/src/ReinforcementLearningCore/src/core/run.jl:74 [11] run(ex::Experiment{:JuliaRL_NFQ_CartPole}) @ ReinforcementLearningCore ~/.julia/dev/ReinforcementLearning/src/ReinforcementLearningCore/src/core/run.jl:63 [12] top-level scope @ REPL[5]:1 [13] top-level scope @ ~/.julia/packages/CUDA/35NC6/src/initialization.jl:190
The text was updated successfully, but these errors were encountered:
@HenriDeh
Sorry, something went wrong.
I transfered to RLTraj. It is somewhat of a duplicate of #52
No branches or pull requests
See JuliaReinforcementLearning/ReinforcementLearning.jl#980
To reproduce, check out the EpisodeSampleRatioController branch in my fork and run the
JuliaRL_NFQ_CartPole
experimentStacktrace
The text was updated successfully, but these errors were encountered: