From abeab01e0696989bee77e56b701e4e87fc4c8b83 Mon Sep 17 00:00:00 2001
From: JinraeKim <kjl950403@gmail.com>
Date: Fri, 7 Jan 2022 14:00:54 +0900
Subject: [PATCH 1/4] Modify update! (now it's evaluate_policy!)

---
 src/irl/linear_irl.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/irl/linear_irl.jl b/src/irl/linear_irl.jl
index 3dde20c..0321676 100644
--- a/src/irl/linear_irl.jl
+++ b/src/irl/linear_irl.jl
@@ -7,6 +7,8 @@ See [1, "Online Implementation of IRL: A Hybrid Optimal Adaptive Controller"].
 # Notes
 - T: Data stack period
 - N: The maximum length of stacked data
+- ϕs_prev: the vector of bases (evaluated)
+- V̂: the vector of approximate values (evaluated)
 """
 mutable struct LinearIRL <: AbstractIRL
     Q::AbstractMatrix

From f6c4ece0f531b1dd1914187a21e2ec2145039d2f Mon Sep 17 00:00:00 2001
From: JinraeKim <kjl950403@gmail.com>
Date: Thu, 30 Dec 2021 23:23:46 +0900
Subject: [PATCH 2/4] wip

---
 src/irl/linear_irl.jl | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/irl/linear_irl.jl b/src/irl/linear_irl.jl
index 0321676..3dde20c 100644
--- a/src/irl/linear_irl.jl
+++ b/src/irl/linear_irl.jl
@@ -7,8 +7,6 @@ See [1, "Online Implementation of IRL: A Hybrid Optimal Adaptive Controller"].
 # Notes
 - T: Data stack period
 - N: The maximum length of stacked data
-- ϕs_prev: the vector of bases (evaluated)
-- V̂: the vector of approximate values (evaluated)
 """
 mutable struct LinearIRL <: AbstractIRL
     Q::AbstractMatrix

From 9dcf953b4dd3f0f698bd9590c2ead2ec888dea2c Mon Sep 17 00:00:00 2001
From: JinraeKim <kjl950403@gmail.com>
Date: Mon, 3 Jan 2022 23:30:08 +0900
Subject: [PATCH 3/4] wip

---
 src/algorithms/algorithms.jl           |  2 ++
 src/algorithms/ct_vi_adp/ct_vi_adp.jl  | 20 ++++++++++++++++++++
 src/{ => algorithms}/irl/irl.jl        |  0
 src/{ => algorithms}/irl/linear_irl.jl |  0
 4 files changed, 22 insertions(+)
 create mode 100644 src/algorithms/algorithms.jl
 create mode 100644 src/algorithms/ct_vi_adp/ct_vi_adp.jl
 rename src/{ => algorithms}/irl/irl.jl (100%)
 rename src/{ => algorithms}/irl/linear_irl.jl (100%)

diff --git a/src/algorithms/algorithms.jl b/src/algorithms/algorithms.jl
new file mode 100644
index 0000000..3a0fb75
--- /dev/null
+++ b/src/algorithms/algorithms.jl
@@ -0,0 +1,2 @@
+include("irl/irl.jl")
+include("ct_vi_adp/ct_vi_adp.jl")
diff --git a/src/algorithms/ct_vi_adp/ct_vi_adp.jl b/src/algorithms/ct_vi_adp/ct_vi_adp.jl
new file mode 100644
index 0000000..a560735
--- /dev/null
+++ b/src/algorithms/ct_vi_adp/ct_vi_adp.jl
@@ -0,0 +1,20 @@
+abstract type AbstractCTVIADP end
+
+
+"""
+[1, Section IV.B]
+# Refs
+[1] T. Bian and Z.-P. Jiang, “Reinforcement Learning and Adaptive Optimal Control for Continuous-Time Nonlinear Systems: A Value Iteration Approach,” IEEE Trans. Neural Netw. Learning Syst., pp. 1–10, 2021, doi: 10.1109/TNNLS.2020.3045087.
+[2] T. Bian and Z.-P. Jiang, “Value Iteration, Adaptive Dynamic Programming, and Optimal Control of Nonlinear Systems,” in 2016 IEEE 55th Conference on Decision and Control (CDC), Las Vegas, NV, USA, Dec. 2016, pp. 3375–3380. doi: 10.1109/CDC.2016.7798777.
+"""
+struct CTVIADP <: AbstractCTVIADP
+    ϕs
+    ψs
+    K_ϕ
+    K_ψ
+end
+
+function value_iteration!(alg::CTVIADP, w)
+    error("Complete this")
+    error("Add a new stop cond")
+end
diff --git a/src/irl/irl.jl b/src/algorithms/irl/irl.jl
similarity index 100%
rename from src/irl/irl.jl
rename to src/algorithms/irl/irl.jl
diff --git a/src/irl/linear_irl.jl b/src/algorithms/irl/linear_irl.jl
similarity index 100%
rename from src/irl/linear_irl.jl
rename to src/algorithms/irl/linear_irl.jl

From ebb72953fe1e3776363d62da0b4ef7fe03425323 Mon Sep 17 00:00:00 2001
From: JinraeKim <kjl950403@gmail.com>
Date: Fri, 7 Jan 2022 13:47:36 +0900
Subject: [PATCH 4/4] wip

---
 src/algorithms/ct_vi_adp/ct_vi_adp.jl | 39 +++++++++++++++++++++++----
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/src/algorithms/ct_vi_adp/ct_vi_adp.jl b/src/algorithms/ct_vi_adp/ct_vi_adp.jl
index a560735..5a268dc 100644
--- a/src/algorithms/ct_vi_adp/ct_vi_adp.jl
+++ b/src/algorithms/ct_vi_adp/ct_vi_adp.jl
@@ -1,20 +1,49 @@
-abstract type AbstractCTVIADP end
-
-
 """
 [1, Section IV.B]
 # Refs
 [1] T. Bian and Z.-P. Jiang, “Reinforcement Learning and Adaptive Optimal Control for Continuous-Time Nonlinear Systems: A Value Iteration Approach,” IEEE Trans. Neural Netw. Learning Syst., pp. 1–10, 2021, doi: 10.1109/TNNLS.2020.3045087.
 [2] T. Bian and Z.-P. Jiang, “Value Iteration, Adaptive Dynamic Programming, and Optimal Control of Nonlinear Systems,” in 2016 IEEE 55th Conference on Decision and Control (CDC), Las Vegas, NV, USA, Dec. 2016, pp. 3375–3380. doi: 10.1109/CDC.2016.7798777.
 """
-struct CTVIADP <: AbstractCTVIADP
+struct CTVIADP <: AbstractEnv  # from FlightSims
     ϕs
     ψs
     K_ϕ
     K_ψ
+    buffer::DataBuffer
+end
+
+function State(env::CTVIADP)
+    @unpack ϕs = env
+    N_ϕ = length(ϕs)
+    return function (w=zeros(N_ϕ))
+        w
+    end
+end
+
+function Dynamics!(env::CTVIADP)
+    error("Complete this")
+    @unpack ϕs, K_ϕ, K_ψ, buffer = env
+    @unpack data_array = buffer
+    ts = data_array |> Map(datum -> datum.t) |> collect
+    xs = data_array |> Map(datum -> datum.x) |> collect
+    Φs = xs |> Map(x ->
+                   (ϕs |> Map(ϕ -> ϕ(x)) |> collect)  # Φ
+                  ) |> collect
+    K_ϕ_inv = inv(K_ϕ)
+    K_ψ_inv = inv(K_ψ)
+    function dynamics!(dw, w, p, t)
+        error("Complete this")
+        # ĉ = K_ψ_inv * integrate(ts, blahblah)  # Eq. (14)
+        # dw .= K_ϕ_inv *   # Eq. (13)
+    end
 end
 
-function value_iteration!(alg::CTVIADP, w)
+function value_iteration!(env::CTVIADP, w)
     error("Complete this")
     error("Add a new stop cond")
 end
+
+function CTVIADP_Simulator(env::CTVIADP)
+    error("Complete this")
+    simulator = Simulator()
+end