From abeab01e0696989bee77e56b701e4e87fc4c8b83 Mon Sep 17 00:00:00 2001 From: JinraeKim Date: Fri, 7 Jan 2022 14:00:54 +0900 Subject: [PATCH 1/4] Modify update! (now it's evaluate_policy!) --- src/irl/linear_irl.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/irl/linear_irl.jl b/src/irl/linear_irl.jl index 3dde20c..0321676 100644 --- a/src/irl/linear_irl.jl +++ b/src/irl/linear_irl.jl @@ -7,6 +7,8 @@ See [1, "Online Implementation of IRL: A Hybrid Optimal Adaptive Controller"]. # Notes - T: Data stack period - N: The maximum length of stacked data +- ϕs_prev: the vector of bases (evaluated) +- V̂: the vector of approximate values (evaluated) """ mutable struct LinearIRL <: AbstractIRL Q::AbstractMatrix From f6c4ece0f531b1dd1914187a21e2ec2145039d2f Mon Sep 17 00:00:00 2001 From: JinraeKim Date: Thu, 30 Dec 2021 23:23:46 +0900 Subject: [PATCH 2/4] wip --- src/irl/linear_irl.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/irl/linear_irl.jl b/src/irl/linear_irl.jl index 0321676..3dde20c 100644 --- a/src/irl/linear_irl.jl +++ b/src/irl/linear_irl.jl @@ -7,8 +7,6 @@ See [1, "Online Implementation of IRL: A Hybrid Optimal Adaptive Controller"]. # Notes - T: Data stack period - N: The maximum length of stacked data -- ϕs_prev: the vector of bases (evaluated) -- V̂: the vector of approximate values (evaluated) """ mutable struct LinearIRL <: AbstractIRL Q::AbstractMatrix From 9dcf953b4dd3f0f698bd9590c2ead2ec888dea2c Mon Sep 17 00:00:00 2001 From: JinraeKim Date: Mon, 3 Jan 2022 23:30:08 +0900 Subject: [PATCH 3/4] wip --- src/algorithms/algorithms.jl | 2 ++ src/algorithms/ct_vi_adp/ct_vi_adp.jl | 20 ++++++++++++++++++++ src/{ => algorithms}/irl/irl.jl | 0 src/{ => algorithms}/irl/linear_irl.jl | 0 4 files changed, 22 insertions(+) create mode 100644 src/algorithms/algorithms.jl create mode 100644 src/algorithms/ct_vi_adp/ct_vi_adp.jl rename src/{ => algorithms}/irl/irl.jl (100%) rename src/{ => algorithms}/irl/linear_irl.jl (100%) diff --git a/src/algorithms/algorithms.jl b/src/algorithms/algorithms.jl new file mode 100644 index 0000000..3a0fb75 --- /dev/null +++ b/src/algorithms/algorithms.jl @@ -0,0 +1,2 @@ +include("irl/irl.jl") +include("ct_vi_adp/ct_vi_adp.jl") diff --git a/src/algorithms/ct_vi_adp/ct_vi_adp.jl b/src/algorithms/ct_vi_adp/ct_vi_adp.jl new file mode 100644 index 0000000..a560735 --- /dev/null +++ b/src/algorithms/ct_vi_adp/ct_vi_adp.jl @@ -0,0 +1,20 @@ +abstract type AbstractCTVIADP end + + +""" +[1, Section IV.B] +# Refs +[1] T. Bian and Z.-P. Jiang, “Reinforcement Learning and Adaptive Optimal Control for Continuous-Time Nonlinear Systems: A Value Iteration Approach,” IEEE Trans. Neural Netw. Learning Syst., pp. 1–10, 2021, doi: 10.1109/TNNLS.2020.3045087. +[2] T. Bian and Z.-P. Jiang, “Value Iteration, Adaptive Dynamic Programming, and Optimal Control of Nonlinear Systems,” in 2016 IEEE 55th Conference on Decision and Control (CDC), Las Vegas, NV, USA, Dec. 2016, pp. 3375–3380. doi: 10.1109/CDC.2016.7798777. +""" +struct CTVIADP <: AbstractCTVIADP + ϕs + ψs + K_ϕ + K_ψ +end + +function value_iteration!(alg::CTVIADP, w) + error("Complete this") + error("Add a new stop cond") +end diff --git a/src/irl/irl.jl b/src/algorithms/irl/irl.jl similarity index 100% rename from src/irl/irl.jl rename to src/algorithms/irl/irl.jl diff --git a/src/irl/linear_irl.jl b/src/algorithms/irl/linear_irl.jl similarity index 100% rename from src/irl/linear_irl.jl rename to src/algorithms/irl/linear_irl.jl From ebb72953fe1e3776363d62da0b4ef7fe03425323 Mon Sep 17 00:00:00 2001 From: JinraeKim Date: Fri, 7 Jan 2022 13:47:36 +0900 Subject: [PATCH 4/4] wip --- src/algorithms/ct_vi_adp/ct_vi_adp.jl | 39 +++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/src/algorithms/ct_vi_adp/ct_vi_adp.jl b/src/algorithms/ct_vi_adp/ct_vi_adp.jl index a560735..5a268dc 100644 --- a/src/algorithms/ct_vi_adp/ct_vi_adp.jl +++ b/src/algorithms/ct_vi_adp/ct_vi_adp.jl @@ -1,20 +1,49 @@ -abstract type AbstractCTVIADP end - - """ [1, Section IV.B] # Refs [1] T. Bian and Z.-P. Jiang, “Reinforcement Learning and Adaptive Optimal Control for Continuous-Time Nonlinear Systems: A Value Iteration Approach,” IEEE Trans. Neural Netw. Learning Syst., pp. 1–10, 2021, doi: 10.1109/TNNLS.2020.3045087. [2] T. Bian and Z.-P. Jiang, “Value Iteration, Adaptive Dynamic Programming, and Optimal Control of Nonlinear Systems,” in 2016 IEEE 55th Conference on Decision and Control (CDC), Las Vegas, NV, USA, Dec. 2016, pp. 3375–3380. doi: 10.1109/CDC.2016.7798777. """ -struct CTVIADP <: AbstractCTVIADP +struct CTVIADP <: AbstractEnv # from FlightSims ϕs ψs K_ϕ K_ψ + buffer::DataBuffer +end + +function State(env::CTVIADP) + @unpack ϕs = env + N_ϕ = length(ϕs) + return function (w=zeros(N_ϕ)) + w + end +end + +function Dynamics!(env::CTVIADP) + error("Complete this") + @unpack ϕs, K_ϕ, K_ψ, buffer = env + @unpack data_array = buffer + ts = data_array |> Map(datum -> datum.t) |> collect + xs = data_array |> Map(datum -> datum.x) |> collect + Φs = xs |> Map(x -> + (ϕs |> Map(ϕ -> ϕ(x)) |> collect) # Φ + ) |> collect + K_ϕ_inv = inv(K_ϕ) + K_ψ_inv = inv(K_ψ) + function dynamics!(dw, w, p, t) + error("Complete this") + # ĉ = K_ψ_inv * integrate(ts, blahblah) # Eq. (14) + # dw .= K_ϕ_inv * # Eq. (13) + end end -function value_iteration!(alg::CTVIADP, w) +function value_iteration!(env::CTVIADP, w) error("Complete this") error("Add a new stop cond") end + +function CTVIADP_Simulator(env::CTVIADP) + error("Complete this") + simulator = Simulator() +end