diff --git a/Project.toml b/Project.toml index b9f516a1..6974dd45 100644 --- a/Project.toml +++ b/Project.toml @@ -13,6 +13,7 @@ Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" GeometricBase = "9a0b12b7-583b-4f04-aa1f-d8551b6addc9" GeometricEquations = "c85262ba-a08a-430a-b926-d29770767bf2" +GeometricProblems = "18cb22b4-ad41-5c80-9c5f-710df63fbdc9" GeometricSolutions = "7843afe4-64f4-4df4-9231-049495c56661" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" @@ -20,6 +21,7 @@ KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" +ParameterHandling = "2412ca09-6db7-441c-8e3a-88d5709968c5" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" @@ -48,6 +50,7 @@ HDF5 = "0.16, 0.17" KernelAbstractions = "0.9" LazyArrays = "=2.3.2" NNlib = "0.8, 0.9" +ParameterHandling = "0.5.0" ProgressMeter = "1" SafeTestsets = "0.1" StatsBase = "0.33, 0.34" diff --git a/docs/src/GeometricMachineLearning.bib b/docs/src/GeometricMachineLearning.bib index b9a16fcf..6f5447b1 100644 --- a/docs/src/GeometricMachineLearning.bib +++ b/docs/src/GeometricMachineLearning.bib @@ -751,9 +751,10 @@ @article{bon2024optimal @article{kingma2014adam, title={Adam: a method for stochastic optimization}, - author={Kingma, DP}, + author={Kingma, Diederik P. and Ba, Jimmy Lei}, journal={arXiv preprint arXiv:1412.6980}, - year={2014} + year={2014}, + note={Published as a conference paper at ICLR 2015} } @article{toda1967vibration, @@ -928,6 +929,16 @@ @article{ge1988lie publisher={Elsevier} } +@article{marsden2001discrete, + title={Discrete mechanics and variational integrators}, + author={Marsden, Jerrold E and West, Matthew}, + journal={Acta numerica}, + volume={10}, + pages={357--514}, + year={2001}, + publisher={Cambridge University Press} +} + @article{otto2023learning, title={Learning nonlinear projections for reduced-order modeling of dynamical systems using constrained autoencoders}, author={Otto, Samuel E and Macchio, Gregory R and Rowley, Clarence W}, diff --git a/docs/src/architectures/hamiltonian_neural_network.md b/docs/src/architectures/hamiltonian_neural_network.md index 1b8c68b6..86f86387 100644 --- a/docs/src/architectures/hamiltonian_neural_network.md +++ b/docs/src/architectures/hamiltonian_neural_network.md @@ -42,12 +42,18 @@ Here the derivatives (i.e. vector field data) ``\dot{q}_i^{(t)}`` and ``\dot{p}_ ## Library Functions ```@docs -GeometricMachineLearning.hamiltonian_vector_field(::HamiltonianArchitecture) +GeometricMachineLearning.hamiltonian_vector_field(::StandardHamiltonianArchitecture) GeometricMachineLearning.HamiltonianArchitecture GeometricMachineLearning.StandardHamiltonianArchitecture GeometricMachineLearning.HNNLoss GeometricMachineLearning.symbolic_hamiltonian_vector_field(::GeometricMachineLearning.SymbolicNeuralNetwork) GeometricMachineLearning.SymbolicPullback(::HamiltonianArchitecture) +GeometricMachineLearning.SymbolicEnergy +GeometricMachineLearning.SymbolicPotentialEnergy +GeometricMachineLearning.SymbolicKineticEnergy +GeometricMachineLearning.build_gradient GeometricMachineLearning.GeneralizedHamiltonianArchitecture +GeometricMachineLearning.ForcingLayer +GeometricMachineLearning.ParametricDataLoader GeometricMachineLearning._processing ``` \ No newline at end of file diff --git a/scripts/TimeDependentHarmonicOscillatorForcedGeneralizedHamiltonianNeuralNetwork.jl b/scripts/TimeDependentHarmonicOscillatorForcedGeneralizedHamiltonianNeuralNetwork.jl new file mode 100644 index 00000000..e9ca22ce --- /dev/null +++ b/scripts/TimeDependentHarmonicOscillatorForcedGeneralizedHamiltonianNeuralNetwork.jl @@ -0,0 +1,161 @@ +using HDF5 +using GeometricMachineLearning +using GeometricMachineLearning: QPT, QPT2, Activation, ParametricLoss, SymbolicNeuralNetwork, SymbolicPullback +using CairoMakie +using NNlib: relu + +# PARAMETERS +omega = 1.0 # natural frequency of the harmonic Oscillator +Omega = 3.5 # frequency of the external sinusoidal forcing +F = .9 # amplitude of the external sinusoidal forcing +ni_dim = 10 # number of initial conditions per dimension (so ni_dim^2 total) +T = 2π * 20 +nt = 1000 # number of time steps +dt = T/nt # time step + +# Generating the initial condition array +IC = vec( [(q=q0, p=p0) for q0 in range(-1, 1, ni_dim), p0 in range(-1, 1, ni_dim)] ) + +# Generating the solution array +ni = ni_dim^2 +q = zeros(Float64, ni, nt+1) +p = zeros(Float64, ni, nt+1) +t = collect(dt * range(0, nt, step=1)) + +""" +Turn a vector of numbers into a vector of `NamedTuple`s to be used by `ParametricDataLoader`. +""" +function turn_parameters_into_correct_format(t::AbstractVector, IC::AbstractVector{<:NamedTuple}) + vec_of_params = NamedTuple[] + for time_step ∈ t + time_step == t[end] || push!(vec_of_params, (t = time_step, )) + end + vcat((vec_of_params for _ in axes(IC, 1))...) +end + +for i in 1:nt+1 + for j=1:ni + q[j,i] = ( IC[j].p - Omega*F/(omega^2-Omega^2) )/ omega *sin(omega*t[i]) + IC[j].q*cos(omega*t[i]) + F/(omega^2-Omega^2)*sin(Omega*t[i]) + p[j,i] = -omega^2*IC[j].q*sin(omega*t[i]) + ( IC[j].p - Omega*F/(omega^2-Omega^2) )*cos(omega*t[i]) + Omega*F/(omega^2-Omega^2)*cos(Omega*t[i]) + # q[j,i] = ( IC[j].p - Omega*F/(omega^2-Omega^2) )/ omega *exp(-omega*t[i]) - IC[j].q*exp(-omega*t[i]) + F/(omega^2-Omega^2)*exp(-Omega*t[i]) + # p[j,i] = -omega^2*IC[j].q*exp(-omega*t[i]) + ( IC[j].p + Omega*F/(omega^2-Omega^2) )*exp(-omega*t[i]) - Omega*F/(omega^2-Omega^2)*exp(-Omega*t[i]) + end + +end + +@doc raw""" +Turn a `NamedTuple` of ``(q,p)`` data into two tensors of the correct format. + +This is the tricky part as the structure of the input array(s) needs to conform with the structure of the parameters. + +Here the data are rearranged in an array of size ``(n, 2, t_f - 1)`` where ``[t_0, t_1, \ldots, t_f]`` is the vector storing the time steps. + +If we deal with different initial conditions as well, we still put everything into the third (parameter) axis. + +# Example + +```jldoctest +using GeometricMachineLearning + +q = [1. 2. 3.; 4. 5. 6.] +p = [1.5 2.5 3.5; 4.5 5.5 6.5] +qp = (q = q, p = p) +turn_q_p_data_into_correct_format(qp) + +# output + +(q = [1.0 2.0; 4.0 5.0;;; 2.0 3.0; 5.0 6.0], p = [1.5 2.5; 4.5 5.5;;; 2.5 3.5; 5.5 6.5]) +``` +""" +function turn_q_p_data_into_correct_format(qp::QPT2{T, 2}) where {T} + number_of_time_steps = size(qp.q, 2) - 1 # not counting t₀ + number_of_initial_conditions = size(qp.q, 1) + q_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + p_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + for initial_condition_index ∈ 0:(number_of_initial_conditions - 1) + for time_index ∈ 1:number_of_time_steps + q_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index] + q_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index + 1] + p_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index] + p_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index + 1] + end + end + (q = q_array, p = p_array) +end + +# SAVING TO FILE + +# h5 = h5open(path, "w") +# write(h5, "q", q) +# write(h5, "p", p) +# write(h5, "t", t) +# +# attrs(h5)["ni"] = ni +# attrs(h5)["nt"] = nt +# attrs(h5)["dt"] = dt +# +# close(h5) + +""" +This takes time as a single additional parameter (third axis). +""" +function load_time_dependent_harmonic_oscillator_with_parametric_data_loader(qp::QPT{T}, t::AbstractVector{T}, IC::AbstractVector) where {T} + qp_reformatted = turn_q_p_data_into_correct_format(qp) + t_reformatted = turn_parameters_into_correct_format(t, IC) + ParametricDataLoader(qp_reformatted, t_reformatted) +end + +# This sets up the data loader +dl = load_time_dependent_harmonic_oscillator_with_parametric_data_loader((q = q, p = p), t, IC) + +# This sets up the neural network +width::Int = 1 +nhidden::Int = 1 +n_integrators::Int = 2 +# sigmoid_linear_unit(x::T) where {T<:Number} = x / (T(1) + exp(-x)) +arch1 = ForcedGeneralizedHamiltonianArchitecture(2; activation = tanh, width = width, nhidden = nhidden, n_integrators = n_integrators, parameters = turn_parameters_into_correct_format(t, IC)[1], forcing_type = :P) +arch2 = ForcedGeneralizedHamiltonianArchitecture(2; activation = tanh, width = width, nhidden = nhidden, n_integrators = n_integrators, parameters = turn_parameters_into_correct_format(t, IC)[1], forcing_type = :Q) +arch3 = ForcedGeneralizedHamiltonianArchitecture(2; activation = tanh, width = 2width, nhidden = nhidden, n_integrators = n_integrators, parameters = turn_parameters_into_correct_format(t, IC)[1], forcing_type = :QP) +nn1 = NeuralNetwork(arch1) +nn2 = NeuralNetwork(arch2) +nn3 = NeuralNetwork(arch3) + +# This is where training starts +batch_size = 128 +n_epochs = 200 +batch = Batch(batch_size) +o1 = Optimizer(AdamOptimizer(), nn1) +o2 = Optimizer(AdamOptimizer(), nn2) +o3 = Optimizer(AdamOptimizer(), nn3) +loss = ParametricLoss() +_pb = SymbolicPullback(nn1, loss, turn_parameters_into_correct_format(t, IC)[1]); +_pb = SymbolicPullback(nn2, loss, turn_parameters_into_correct_format(t, IC)[1]); +_pb = SymbolicPullback(nn3, loss, turn_parameters_into_correct_format(t, IC)[1]); + +function train_network() + o1(nn1, dl, batch, n_epochs, loss, _pb) + o2(nn2, dl, batch, n_epochs, loss, _pb) + o3(nn3, dl, batch, n_epochs, loss, _pb) +end + +loss_array = train_network() + +trajectory_number = 20 + +# Testing the network +initial_conditions = (q = q[trajectory_number, 1], p = p[trajectory_number, 1]) +n_steps = nt +trajectory = (q = zeros(1, n_steps), p = zeros(1, n_steps)) +trajectory.q[:, 1] .= initial_conditions.q +trajectory.p[:, 1] .= initial_conditions.p +# note that we have to supply the parameters as a named tuple as well here: +for t_step ∈ 0:(n_steps-2) + qp_temporary = nn3.model((q = [trajectory.q[1, t_step+1]], p = [trajectory.p[1, t_step+1]]), (t = t[t_step+1],), nn3.params) + trajectory.q[:, t_step+2] .= qp_temporary.q + trajectory.p[:, t_step+2] .= qp_temporary.p +end + +fig = Figure() +ax = Axis(fig[1,1]) +lines!(ax, trajectory.q[1,:]; label="nn") +lines!(ax, q[trajectory_number,:]; label="analytic") \ No newline at end of file diff --git a/scripts/TimeDependentHarmonicOscillatorParametricResnet.jl b/scripts/TimeDependentHarmonicOscillatorParametricResnet.jl new file mode 100644 index 00000000..5b6e0f31 --- /dev/null +++ b/scripts/TimeDependentHarmonicOscillatorParametricResnet.jl @@ -0,0 +1,150 @@ +using HDF5 +using GeometricMachineLearning +using GeometricMachineLearning: QPT, QPT2, Activation, ParametricLoss, SymbolicNeuralNetwork, SymbolicPullback +using CairoMakie +using NNlib: relu + +# PARAMETERS +omega = 1.0 # natural frequency of the harmonic Oscillator +Omega = 3.5 # frequency of the external sinusoidal forcing +F = .0 # .9 # amplitude of the external sinusoidal forcing +ni_dim = 10 # number of initial conditions per dimension (so ni_dim^2 total) +T = 2π * 5 +nt = 1000 # number of time steps +dt = T/nt # time step + +# Generating the initial condition array +IC = vec( [(q=q0, p=p0) for q0 in range(-1, 1, ni_dim), p0 in range(-1, 1, ni_dim)] ) + +# Generating the solution array +ni = ni_dim^2 +q = zeros(Float64, ni, nt+1) +p = zeros(Float64, ni, nt+1) +t = collect(dt * range(0, nt, step=1)) + +""" +Turn a vector of numbers into a vector of `NamedTuple`s to be used by `ParametricDataLoader`. +""" +function turn_parameters_into_correct_format(t::AbstractVector, IC::AbstractVector{<:NamedTuple}) + vec_of_params = NamedTuple[] + for time_step ∈ t + time_step == t[end] || push!(vec_of_params, (t = time_step, )) + end + vcat((vec_of_params for _ in axes(IC, 1))...) +end + +for i in 1:nt+1 + for j=1:ni + q[j,i] = ( IC[j].p - Omega*F/(omega^2-Omega^2) )/ omega *sin(omega*t[i]) + IC[j].q*cos(omega*t[i]) + F/(omega^2-Omega^2)*sin(Omega*t[i]) + p[j,i] = -omega^2*IC[j].q*sin(omega*t[i]) + ( IC[j].p - Omega*F/(omega^2-Omega^2) )*cos(omega*t[i]) + Omega*F/(omega^2-Omega^2)*cos(Omega*t[i]) + # q[j,i] = ( IC[j].p - Omega*F/(omega^2-Omega^2) )/ omega *exp(-omega*t[i]) - IC[j].q*exp(-omega*t[i]) + F/(omega^2-Omega^2)*exp(-Omega*t[i]) + # p[j,i] = -omega^2*IC[j].q*exp(-omega*t[i]) + ( IC[j].p + Omega*F/(omega^2-Omega^2) )*exp(-omega*t[i]) - Omega*F/(omega^2-Omega^2)*exp(-Omega*t[i]) + end + +end + +@doc raw""" +Turn a `NamedTuple` of ``(q,p)`` data into two tensors of the correct format. + +This is the tricky part as the structure of the input array(s) needs to conform with the structure of the parameters. + +Here the data are rearranged in an array of size ``(n, 2, t_f - 1)`` where ``[t_0, t_1, \ldots, t_f]`` is the vector storing the time steps. + +If we deal with different initial conditions as well, we still put everything into the third (parameter) axis. + +# Example + +```jldoctest +using GeometricMachineLearning + +q = [1. 2. 3.; 4. 5. 6.] +p = [1.5 2.5 3.5; 4.5 5.5 6.5] +qp = (q = q, p = p) +turn_q_p_data_into_correct_format(qp) + +# output + +(q = [1.0 2.0; 4.0 5.0;;; 2.0 3.0; 5.0 6.0], p = [1.5 2.5; 4.5 5.5;;; 2.5 3.5; 5.5 6.5]) +``` +""" +function turn_q_p_data_into_correct_format(qp::QPT2{T, 2}) where {T} + number_of_time_steps = size(qp.q, 2) - 1 # not counting t₀ + number_of_initial_conditions = size(qp.q, 1) + q_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + p_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + for initial_condition_index ∈ 0:(number_of_initial_conditions - 1) + for time_index ∈ 1:number_of_time_steps + q_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index] + q_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index + 1] + p_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index] + p_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index + 1] + end + end + (q = q_array, p = p_array) +end + +# SAVING TO FILE + +# h5 = h5open(path, "w") +# write(h5, "q", q) +# write(h5, "p", p) +# write(h5, "t", t) +# +# attrs(h5)["ni"] = ni +# attrs(h5)["nt"] = nt +# attrs(h5)["dt"] = dt +# +# close(h5) + +""" +This takes time as a single additional parameter (third axis). +""" +function load_time_dependent_harmonic_oscillator_with_parametric_data_loader(qp::QPT{T}, t::AbstractVector{T}, IC::AbstractVector) where {T} + qp_reformatted = turn_q_p_data_into_correct_format(qp) + t_reformatted = turn_parameters_into_correct_format(t, IC) + ParametricDataLoader(qp_reformatted, t_reformatted) +end + +# This sets up the data loader +dl = load_time_dependent_harmonic_oscillator_with_parametric_data_loader((q = q, p = p), t, IC) + +# This sets up the neural network +width::Int = 2 +n_blocks::Int = 1 +n_integrators::Int = 1 +# sigmoid_linear_unit(x::T) where {T<:Number} = x / (T(1) + exp(-x)) +arch = ResNet(2, n_blocks=n_blocks, width=width; activation=tanh, parameters=turn_parameters_into_correct_format(t, IC)[1]) +nn = NeuralNetwork(arch) + +# This is where training starts +batch_size = 128 +n_epochs = 200 +batch = Batch(batch_size) +o = Optimizer(AdamOptimizer(), nn) +loss = ParametricLoss() + +function train_network() + o(nn, dl, batch, n_epochs, loss) +end + +loss_array = train_network() + +trajectory_number = 20 + +# Testing the network +initial_conditions = (q = q[trajectory_number, 1], p = p[trajectory_number, 1]) +n_steps = nt +trajectory = (q = zeros(1, n_steps), p = zeros(1, n_steps)) +trajectory.q[:, 1] .= initial_conditions.q +trajectory.p[:, 1] .= initial_conditions.p +# note that we have to supply the parameters as a named tuple as well here: +for t_step ∈ 0:(n_steps-2) + qp_temporary = nn.model((q = [trajectory.q[1, t_step+1]], p = [trajectory.p[1, t_step+1]]), (t = t[t_step+1],), nn.params) + trajectory.q[:, t_step+2] .= qp_temporary.q + trajectory.p[:, t_step+2] .= qp_temporary.p +end + +fig = Figure() +ax = Axis(fig[1,1]) +lines!(ax, trajectory.q[1,:]; label="nn") +lines!(ax, q[trajectory_number,:]; label="analytic") \ No newline at end of file diff --git a/scripts/Train_DampedOscillator_QP.jl b/scripts/Train_DampedOscillator_QP.jl new file mode 100644 index 00000000..1058ef94 --- /dev/null +++ b/scripts/Train_DampedOscillator_QP.jl @@ -0,0 +1,114 @@ +using HDF5 +using GeometricMachineLearning +using GeometricMachineLearning: QPT, QPT2 +using CairoMakie +using JLD2 +using NNlib: relu + +# PARAMETERS +nu = 0.001 # friction force coefficient +ni_dim = 2 # number of initial conditions per dimension (so ni_dim^2 total) +T = 13 +nt = 100 # number of time steps +dt = T/nt # time step +n_epochs = 100000 +n_epochs = 3 +width = 4 # width of the neural network +nhidden = 3 # number of hidden layers in the neural network +batch_size = 5000 # the size of the batch + +path_out = "D:\\RESEARCH - UTWENTE\\GFHNNs\\Damped Oscillator\\network_TEST.jld2" +#path_out = "/home/tyranowskitm/GFHNNs/DampedOscillator/OUTPUTS/network.jld2" + + +# Generating the initial condition array +IC = vec( [(q=q0, p=p0) for q0 in range(-1, 1, ni_dim), p0 in range(-1, 1, ni_dim)] ) + + +# Generating the solution array +ni = ni_dim^2 +omega = sqrt(4-nu^2) / 2 + +q = zeros(Float64, ni, nt+1) +p = zeros(Float64, ni, nt+1) +t = collect(dt*range(0,nt,step=1)) + +for i in 1:nt+1 + + for j=1:ni + q[j,i] = (1/omega)*( IC[j].p + nu/2 *IC[j].q )*exp(-nu*t[i]/2)*sin(omega*t[i]) + IC[j].q*exp(-nu*t[i]/2)*cos(omega*t[i]) + p[j,i] = -(1/omega)*( IC[j].q + nu/2 *IC[j].p )*exp(-nu*t[i]/2)*sin(omega*t[i]) + IC[j].p*exp(-nu*t[i]/2)*cos(omega*t[i]) + end + +end + + + +@doc raw""" +Turn a `NamedTuple` of ``(q,p)`` data into two tensors of the correct format. + +This is the tricky part as the structure of the input array(s) needs to conform with the structure of the parameters. + +Here the data are rearranged in an array of size ``(n, 2, t_f - 1)`` where ``[t_0, t_1, \ldots, t_f]`` is the vector storing the time steps. + +If we deal with different initial conditions as well, we still put everything into the third (parameter) axis. + +# Example + +```jldoctest +using GeometricMachineLearning + +q = [1. 2. 3.; 4. 5. 6.] +p = [1.5 2.5 3.5; 4.5 5.5 6.5] +qp = (q = q, p = p) +turn_q_p_data_into_correct_format(qp) + +# output + +(q = [1.0 2.0; 4.0 5.0;;; 2.0 3.0; 5.0 6.0], p = [1.5 2.5; 4.5 5.5;;; 2.5 3.5; 5.5 6.5]) +``` +""" +function turn_q_p_data_into_correct_format(qp::QPT2{T, 2}) where {T} + number_of_time_steps = size(qp.q, 2) - 1 # not counting t₀ + number_of_initial_conditions = size(qp.q, 1) + q_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + p_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + for initial_condition_index ∈ 0:(number_of_initial_conditions - 1) + for time_index ∈ 1:number_of_time_steps + q_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index] + q_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index + 1] + p_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index] + p_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index + 1] + end + end + (q = q_array, p = p_array) +end + + +# This sets up the data loader +dl = DataLoader(turn_q_p_data_into_correct_format((q = q, p = p))) + +# This sets up the neural network +arch = ForcedSympNet(2; upscaling_dimension = width, n_layers = nhidden, forcing_type = :P) +#arch = ForcedSympNet(2; upscaling_dimension = width, n_layers = nhidden, activation=(x-> max(0,x)^2/2)) +nn = NeuralNetwork(arch) + +# This is where training starts +batch = Batch(batch_size) +o = Optimizer(AdamOptimizer(), nn) + +loss_array = o(nn, dl, batch, n_epochs) + + +# Saving the parameters of the network +println("Saving the parameters of the neural network...") +flush(stdout) + +params = GeometricMachineLearning.map_to_cpu(nn.params) + +save(path_out,"parameters", params, "training loss", loss_array, "ni_dim", ni_dim, "T", T, "nt", nt, "n_epochs", n_epochs, "width", width, "nhidden", nhidden, "batch_size", batch_size, "nu", nu) + +println(" ...Done!") +flush(stdout) + + diff --git a/scripts/forcing_layers_parameter_number.jl b/scripts/forcing_layers_parameter_number.jl new file mode 100644 index 00000000..e27376d0 --- /dev/null +++ b/scripts/forcing_layers_parameter_number.jl @@ -0,0 +1,11 @@ +using GeometricMachineLearning +using GeometricMachineLearning: ForcingLayerP, ForcingLayerQP + +forcing_layer_p = ForcingLayerP(2) +forcing_layer_qp = ForcingLayerQP(2) + +nn_p = NeuralNetwork(forcing_layer_p) +nn_qp = NeuralNetwork(forcing_layer_qp) + +println(parameterlength(nn_p)) +println(parameterlength(nn_qp)) \ No newline at end of file diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl index bdc2d8e1..bd144423 100644 --- a/src/GeometricMachineLearning.jl +++ b/src/GeometricMachineLearning.jl @@ -19,9 +19,11 @@ module GeometricMachineLearning import LazyArrays import SymbolicNeuralNetworks import SymbolicNeuralNetworks: input_dimension, output_dimension, SymbolicPullback - using SymbolicNeuralNetworks: derivative, _get_contents, _get_params, SymbolicNeuralNetwork + using SymbolicNeuralNetworks: derivative, _get_contents, _get_params, SymbolicNeuralNetwork, AbstractSymbolicNeuralNetwork using Symbolics: @variables, substitute + import ParameterHandling + import AbstractNeuralNetworks: Architecture, Model, AbstractExplicitLayer, AbstractExplicitCell, AbstractNeuralNetwork , NeuralNetwork, UnknownArchitecture, FeedForwardLoss import AbstractNeuralNetworks: Chain, GridCell import AbstractNeuralNetworks: Dense, Linear, Recurrent @@ -151,8 +153,11 @@ module GeometricMachineLearning include("optimizers/manifold_related/retractions.jl") include("layers/sympnets.jl") + include("layers/forcing_dissipation_layers.jl") include("layers/bias_layer.jl") include("layers/resnet.jl") + include("layers/wide_resnet.jl") + include("layers/parametric_resnet_layer.jl") include("layers/manifold_layer.jl") include("layers/stiefel_layer.jl") include("layers/grassmann_layer.jl") @@ -257,6 +262,7 @@ module GeometricMachineLearning #INCLUDE ARCHITECTURES include("architectures/neural_network_integrator.jl") include("architectures/resnet.jl") + include("architectures/parametric_resnet.jl") include("architectures/transformer_integrator.jl") include("architectures/standard_transformer_integrator.jl") include("architectures/sympnet.jl") @@ -265,6 +271,8 @@ module GeometricMachineLearning include("architectures/psd.jl") include("architectures/fixed_width_network.jl") include("architectures/hamiltonian_neural_network.jl") + include("architectures/standard_hamiltonian_neural_network.jl") + include("architectures/generalized_hamiltonian_neural_network.jl") include("architectures/lagrangian_neural_network.jl") include("architectures/variable_width_network.jl") include("architectures/recurrent_neural_network.jl") @@ -281,7 +289,7 @@ module GeometricMachineLearning export ClassificationTransformer, ClassificationLayer export VolumePreservingFeedForward export SymplecticAutoencoder, PSDArch - export HamiltonianArchitecture, StandardHamiltonianArchitecture, GeneralizedHamiltonianArchitecture + export HamiltonianArchitecture, StandardHamiltonianArchitecture, GeneralizedHamiltonianArchitecture, ForcedGeneralizedHamiltonianArchitecture export solve!, encoder, decoder @@ -299,7 +307,7 @@ module GeometricMachineLearning include("pullbacks/zygote_pullback.jl") include("pullbacks/symbolic_hnn_pullback.jl") - export DataLoader, onehotbatch + export DataLoader, ParametricDataLoader, onehotbatch export Batch, optimize_for_one_epoch! include("data_loader/tensor_assign.jl") include("data_loader/matrix_assign.jl") @@ -307,6 +315,12 @@ module GeometricMachineLearning include("data_loader/batch.jl") include("data_loader/optimize.jl") + include("data_loader/parametric_data_loader.jl") + + include("architectures/forced_sympnet.jl") + include("architectures/forced_generalized_hamiltonian_neural_network.jl") + export ForcedSympNet + export default_optimizer include("optimizers/default_optimizer.jl") @@ -358,8 +372,6 @@ module GeometricMachineLearning include("training/train.jl") - export SymplecticEuler - export SymplecticEulerA, SymplecticEulerB export SEuler, SEulerA, SEulerB include("training_method/symplectic_euler.jl") diff --git a/src/architectures/forced_generalized_hamiltonian_neural_network.jl b/src/architectures/forced_generalized_hamiltonian_neural_network.jl new file mode 100644 index 00000000..2bb97632 --- /dev/null +++ b/src/architectures/forced_generalized_hamiltonian_neural_network.jl @@ -0,0 +1,35 @@ +const N_FORCING_LAYERS_DEFAULT = 2 + +""" + ForcedGeneralizedHamiltonianArchitecture <: HamiltonianArchitecture + +A version of [`GeneralizedHamiltonianArchitecture`](@ref) that includes forcing/dissipation terms. Also compare this to [`ForcedSympNet`](@ref). +""" +struct ForcedGeneralizedHamiltonianArchitecture{FT, AT, PT <: OptionalParameters} <: HamiltonianArchitecture{AT} + dim::Int + width::Int + nhidden::Int + n_forcing_layers::Int + n_integrators::Int + parameters::PT + activation::AT + + function ForcedGeneralizedHamiltonianArchitecture(dim; width=dim, nhidden=HNN_nhidden_default, n_forcing_layers=N_FORCING_LAYERS_DEFAULT, n_integrators::Integer=1, activation=HNN_activation_default, parameters=NullParameters(), forcing_type::Symbol=:P) + forcing_type == :P || forcing_type == :Q || forcing_type == :QP || error("Forcing has to be either :Q or :P. It is $(forcing_type).") + activation = (typeof(activation) <: Activation) ? activation : Activation(activation) + new{forcing_type, typeof(activation), typeof(parameters)}(dim, width, nhidden, n_forcing_layers, n_integrators, parameters, activation) + end +end + +function Chain(arch::ForcedGeneralizedHamiltonianArchitecture{FT}) where {FT} + layers = () + kinetic_energy = SymbolicKineticEnergy(arch.dim, arch.width, arch.nhidden, arch.activation; parameters=arch.parameters) + potential_energy = SymbolicPotentialEnergy(arch.dim, arch.width, arch.nhidden, arch.activation; parameters=arch.parameters) + for i ∈ 1:arch.n_integrators + layers = (layers..., SymplecticEulerA(kinetic_energy; return_parameters = true)) + layers = (layers..., ForcingLayer(arch.dim, arch.width, arch.n_forcing_layers, arch.activation; parameters=arch.parameters, return_parameters=true, type=FT)) + _return_parameters = !(i == arch.n_integrators) + layers = (layers..., SymplecticEulerB(potential_energy; return_parameters = _return_parameters)) + end + Chain(layers...) +end \ No newline at end of file diff --git a/src/architectures/forced_sympnet.jl b/src/architectures/forced_sympnet.jl new file mode 100644 index 00000000..582acca4 --- /dev/null +++ b/src/architectures/forced_sympnet.jl @@ -0,0 +1,65 @@ +@doc raw""" + ForcedSympNet <: NeuralNetworkIntegrator + +`ForcedSympNet`s are based on [`SympNet`](@ref)s [jin2020sympnets](@cite) and include [`ForcingLayer`](@ref)s. They are based on [`GSympNet`](@ref)s. + +# Constructor + +```julia +ForcedSympNet(d) +``` + +Make a forced SympNet with dimension ``d.`` + +# Arguments + +Keyword arguments are: +- `upscaling_dimension::Int = 2d`: The *upscaling dimension* of the gradient layer. See the documentation for [`GradientLayerQ`](@ref) and [`GradientLayerP`](@ref) for further explanation. +- `n_layers::Int""" * "$(g_n_layers_default)`" * raw""": The number of layers (i.e. the total number of [`GradientLayerQ`](@ref) and [`GradientLayerP`](@ref)). +- `activation""" * "$(g_activation_default)`" * raw""": The activation function that is applied. +- `init_upper::Bool""" * "$(g_init_upper_default)`" * raw""": Initialize the gradient layer so that it first modifies the $q$-component. +""" +struct ForcedSympNet{FT, AT} <: NeuralNetworkIntegrator + dim::Int + upscaling_dimension::Int + n_layers::Int + n_forcing_layers::Int + act::AT + init_upper::Bool + + function ForcedSympNet(dim::Integer; + upscaling_dimension = 2 * dim, + n_layers = g_n_layers_default, + n_forcing_layers = N_FORCING_LAYERS_DEFAULT, + activation = g_activation_default, + init_upper = g_init_upper_default, + forcing_type::Symbol = :P) + new{forcing_type, typeof(activation)}(dim, upscaling_dimension, n_layers, n_forcing_layers, activation, init_upper) + end + + function ForcedSympNet(dl::DataLoader; + upscaling_dimension = 2 * dl.input_dim, + n_layers = g_n_layers_default, + n_forcing_layers = N_FORCING_LAYERS_DEFAULT, + activation = g_activation_default, + init_upper = g_init_upper_default, + forcing_type::Symbol = :P) + new{forcing_type, typeof(activation)}(dl.input_dim, upscaling_dimension, n_layers, n_forcing_layers, activation, init_upper) + end +end + +function Chain(arch::ForcedSympNet{FT}) where {FT} + layers = () + is_upper_criterion = arch.init_upper ? isodd : iseven + for i in 1:arch.n_layers + layers = + if is_upper_criterion(i) + (layers..., GradientLayerQ(arch.dim, arch.upscaling_dimension, arch.act)) + else + (layers..., + ForcingLayer(arch.dim, arch.upscaling_dimension, arch.n_forcing_layers, arch.act; return_parameters=false, type=FT), + GradientLayerP(arch.dim, arch.upscaling_dimension, arch.act)) + end + end + Chain(layers...) +end \ No newline at end of file diff --git a/src/architectures/generalized_hamiltonian_neural_network.jl b/src/architectures/generalized_hamiltonian_neural_network.jl new file mode 100644 index 00000000..9e020d02 --- /dev/null +++ b/src/architectures/generalized_hamiltonian_neural_network.jl @@ -0,0 +1,326 @@ +""" + SymbolicEnergy + +See [`SymbolicPotentialEnergy`](@ref) and [`SymbolicKineticEnergy`](@ref). +""" +struct SymbolicEnergy{AT <: Activation, PT, Kinetic} + dim::Int + width::Int + nhidden::Int + parameter_length::Int + parameter_convert::PT + activation::AT + + function SymbolicEnergy(dim, width, nhidden, activation; parameters::OptionalParameters=NullParameters(), type) + @assert iseven(dim) "The input dimension must be an even integer!" + flattened_parameters = ParameterHandling.flatten(parameters) + parameter_length = length(flattened_parameters[1]) + _activation = Activation(activation) + new{typeof(_activation), typeof(flattened_parameters[2]), type}(dim, width, nhidden, parameter_length, flattened_parameters[2], _activation) + end +end + +ParameterHandling.flatten(::NullParameters) = ParameterHandling.flatten(NamedTuple()) + +""" + SymbolicPotentialEnergy + +A `const` derived from [`SymbolicEnergy`](@ref). + +# Constructors + +```jldoctest; setup=:(using GeometricMachineLearning; using GeometricMachineLearning: Activation) +julia> params, dim, width, nhidden, activation = (m = 1., ω = π / 2), 2, 2, 1, tanh +((m = 1.0, ω = 1.5707963267948966), 2, 2, 1, tanh) + +julia> se = GeometricMachineLearning.SymbolicPotentialEnergy(dim, width, nhidden, activation; parameters = params); + +``` + +In practice we use `SymbolicPotentialEnergy` (and [`SymbolicKineticEnergy`](@ref)) together with [`build_gradient(::SymbolicEnergy)`](@ref). + +# Parameter Dependence +""" +const SymbolicPotentialEnergy{AT, PT} = SymbolicEnergy{AT, PT, :potential} + +""" + SymbolicKineticEnergy + +A `const` derived from [`SymbolicEnergy`](@ref). + +# Constructors + +See [`SymbolicPotentialEnergy`](@ref). +""" +const SymbolicKineticEnergy{AT, PT} = SymbolicEnergy{AT, PT, :kinetic} + +SymbolicPotentialEnergy(args...; kwargs...) = SymbolicEnergy(args...; type = :potential, kwargs...) +SymbolicKineticEnergy(args...; kwargs...) = SymbolicEnergy(args...; type = :kinetic, kwargs...) + +function Chain(se::SymbolicEnergy) + inner_layers = Tuple( + [Dense(se.width, se.width, se.activation) for _ in 1:se.nhidden] + ) + + Chain( + Dense(se.dim÷2 + se.parameter_length, se.width, se.activation), + inner_layers..., + Linear(se.width, 1; use_bias = false) + ) +end + +function SymbolicNeuralNetworks.Jacobian(f::SymbolicNeuralNetworks.EqT, nn::AbstractSymbolicNeuralNetwork, dim2::Integer) + # make differential of input variables (not of parameters) + Dx = SymbolicNeuralNetworks.symbolic_differentials(nn.input)[1:dim2] + + # Evaluation of gradient + s∇f = hcat([SymbolicNeuralNetworks.expand_derivatives.(SymbolicNeuralNetworks.Symbolics.scalarize(dx(f))) for dx in Dx]...) + + SymbolicNeuralNetworks.Jacobian(f, s∇f, nn) +end + +function SymbolicNeuralNetworks.Jacobian(nn::AbstractSymbolicNeuralNetwork, dim2::Integer) + + # Evaluation of the symbolic output + soutput = nn.model(nn.input, params(nn)) + + SymbolicNeuralNetworks.Jacobian(soutput, nn, dim2) +end + +""" + build_gradient(se) + +Build a gradient function from a [`SymbolicEnergy`](@ref) `se`. + +# Examples + +```jldoctest; setup=:(using GeometricMachineLearning; using GeometricMachineLearning: SymbolicPotentialEnergy, build_gradient, concatenate_array_with_parameters; using GeometricMachineLearning.GeometricBase: OptionalParameters; using Random; Random.seed!(123)) +params, dim = (m = 1., ω = π / 2), 4 + +se = SymbolicPotentialEnergy(dim; parameters = params) + +network_params = NeuralNetwork(Chain(se)).params + +built_grad = build_gradient(se) +grad(qp::AbstractArray, problem_params::OptionalParameters, params::NeuralNetworkParameters) = built_grad(concatenate_array_with_parameters(qp, problem_params), params) + +grad(rand(2), params, network_params) + +# output + +2×1 Matrix{Float64}: + 0.069809944230798 + 0.30150270218327446 +``` +""" +function build_gradient(se::SymbolicEnergy) + model = Chain(se) + nn = SymbolicNeuralNetwork(model) + □ = SymbolicNeuralNetworks.Jacobian(nn, se.dim÷2) + SymbolicNeuralNetworks.build_nn_function(SymbolicNeuralNetworks.derivative(□)', nn.params, nn.input) +end + +struct SymplecticEuler{M, N, FT<:Base.Callable, MT<:Chain, type, ReturnParameters} <: AbstractExplicitLayer{M, N} + gradient_function::FT + energy_model::MT +end + +function parameterlength(integrator::SymplecticEuler) + parameterlength(integrator.energy_model) +end + +function initialparameters(rng::Random.AbstractRNG, init_weight::AbstractNeuralNetworks.Initializer, integrator::SymplecticEuler, backend::KernelAbstractions.Backend, ::Type{T}) where {T} + initialparameters(rng, init_weight, integrator.energy_model, backend, T) +end + +const SymplecticEulerA{M, N, FT, AT, ReturnParameters} = SymplecticEuler{M, N, FT, AT, :A, ReturnParameters} +const SymplecticEulerB{M, N, FT, AT, ReturnParameters} = SymplecticEuler{M, N, FT, AT, :B, ReturnParameters} + +""" +Changes ``q`` (based on the kinetic energy). +""" +function SymplecticEulerA(se::SymbolicKineticEnergy; return_parameters::Bool) + gradient_function = build_gradient(se) + c = Chain(se) + SymplecticEuler{se.dim, se.dim, typeof(gradient_function), typeof(c), :A, return_parameters}(gradient_function, c) +end + +""" +Changes ``p`` (based on the potential energy). +""" +function SymplecticEulerB(se::SymbolicPotentialEnergy; return_parameters::Bool) + gradient_function = build_gradient(se) + c = Chain(se) + SymplecticEuler{se.dim, se.dim, typeof(gradient_function), typeof(c), :B, return_parameters}(gradient_function, c) +end + +function concatenate_array_with_parameters(qp::AbstractVector, params::OptionalParameters) + vcat(qp, ParameterHandling.flatten(params)[1]) +end + +function concatenate_array_with_parameters(qp::AbstractMatrix, params::OptionalParameters) + @assert size(qp, 2) == 1 + vcat(qp, repeat(ParameterHandling.flatten(params)[1], 1, size(qp, 2))) +end + +function concatenate_array_with_parameters(qp::AbstractArray{T, 3}, params::AbstractVector) where {T} + @assert size(qp, 3) == length(params) + matrices = Tuple(concatenate_array_with_parameters(qp[:, :, i], params[i]) for i in axes(qp, 3)) + cat(matrices...; dims = 3) +end + +function concatenate_array_with_parameters(qp::SymbolicNeuralNetworks.Symbolics.Arr{SymbolicNeuralNetworks.Symbolics.Num, 1}, params::NamedTuple) + SymbolicNeuralNetworks.Symbolics.Arr(vcat(qp, values(params)...)) +end + +function concatenate_array_with_parameters(qp::AbstractArray{SymbolicNeuralNetworks.Symbolics.Num, 1}, params::NamedTuple) + concatenate_array_with_parameters(SymbolicNeuralNetworks.Symbolics.Arr(qp), params) +end + +Base.eltype(::SymbolicNeuralNetworks.Symbolics.Arr{SymbolicNeuralNetworks.Symbolics.Num}) = SymbolicNeuralNetworks.Symbolics.Num + +# function concatenate_array_with_parameters(qp::AbstractMatrix, params::OptionalParameters) +# hcat((concatenate_array_with_parameters(qp[:, i], params) for i in axes(qp, 2))...) +# end + +function concatenate_array_with_parameters(qp::AbstractMatrix, params::AbstractVector) + @assert _size(qp, 2) == length(params) + vcat((concatenate_array_with_parameters(@view(qp[:, i]), params[i]) for i in axes(params, 1))...) +end + +concatenate_array_with_parameters(a::AbstractArray{T, 3}, ::GeometricBase.NullParameters) where {T} = a + +function (integrator::SymplecticEulerA{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.p, problem_params) + (q = @view((qp.q + integrator.gradient_function(input, params))[:, 1]), p = qp.p) +end + +function (integrator::SymplecticEulerB{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.q, problem_params) + (q = qp.q, p = @view((qp.p - integrator.gradient_function(input, params))[:, 1])) +end + +function (integrator::SymplecticEulerA{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.p, problem_params) + ((q = @view((qp.q + integrator.gradient_function(input, params))[:, 1]), p = qp.p), problem_params) +end + +function (integrator::SymplecticEulerB{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.q, problem_params) + ((q = qp.q, p = @view((qp.p - integrator.gradient_function(input, params))[:, 1])), problem_params) +end + +function (integrator::SymplecticEuler)(qp_params::Tuple{<:QPTOAT2, <:OptionalParameters}, params::NeuralNetworkParameters) + integrator(qp_params..., params) +end + +function (integrator::SymplecticEuler)(::TT, ::NeuralNetworkParameters) where {TT <: Tuple} + error("The input is of type $(TT). This shouldn't be the case!") +end + +function (integrator::SymplecticEuler{M, N, FT, AT, Type, true})(qp::AbstractArray, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT, Type} + @assert iseven(size(qp, 1)) + n = size(qp, 1)÷2 + qp_split = assign_q_and_p(qp, n) + evaluated = integrator(qp_split, problem_params, params)[1] + (vcat(evaluated.q, evaluated.p), problem_params) +end + +function (integrator::SymplecticEuler{M, N, FT, AT, Type, false})(qp::AbstractArray, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT, Type} + @assert iseven(size(qp, 1)) + n = size(qp, 1)÷2 + qp_split = assign_q_and_p(qp, n) + evaluated = integrator(qp_split, problem_params, params) + vcat(evaluated.q, evaluated.p) +end + +(integrator::SymplecticEuler)(qp::QPTOAT2, params::NeuralNetworkParameters) = integrator(qp, NullParameters(), params) + +""" + GeneralizedHamiltonianArchitecture <: HamiltonianArchitecture + +A realization of generalized Hamiltonian neural networks (GHNNs) as introduced in [horn2025generalized](@cite). + +Also see [`StandardHamiltonianArchitecture`](@ref). + +# Constructor + +The constructor takes the following input arguments: +1. `dim`: system dimension, +2. `width = dim`: width of the hidden layer. By default this is equal to `dim`, +3. `nhidden = $(HNN_nhidden_default)`: the number of hidden layers, +4. `n_integrators`: the number of integrators used in the GHNN. +5. `activation = $(HNN_activation_default)`: the activation function used in the GHNN, +""" +struct GeneralizedHamiltonianArchitecture{AT, PT <: OptionalParameters} <: HamiltonianArchitecture{AT} + dim::Int + width::Int + nhidden::Int + n_integrators::Int + parameters::PT + activation::AT + + function GeneralizedHamiltonianArchitecture(dim; width=dim, nhidden=HNN_nhidden_default, n_integrators::Integer=1, activation=HNN_activation_default, parameters=NullParameters()) + activation = (typeof(activation) <: Activation) ? activation : Activation(activation) + new{typeof(activation), typeof(parameters)}(dim, width, nhidden, n_integrators, parameters, activation) + end +end + +@generated function AbstractNeuralNetworks.applychain(layers::Tuple, x::Tuple{<:QPTOAT2, <:OptionalParameters}, ps::Tuple) + N = length(fieldtypes((layers))) + x_symbols = vcat([:x], [gensym() for _ in 1:N]) + calls = [:(($(x_symbols[i + 1])) = layers[$i]($(x_symbols[i]), ps[$i])) for i in 1:N] + push!(calls, :(return $(x_symbols[N + 1]))) + return Expr(:block, calls...) +end + +index_qpt(qp::QPT2{T, 2}, i, j) where {T} = (q = qp.q[i, j], p = qp.p[i, j]) +index_gpt(qp::QPT2{T, 3}, i, j, k) where {T} = (q = qp.q[i, j, k], p = qp.p[i, j, k]) + +function Chain(ghnn_arch::GeneralizedHamiltonianArchitecture) + c = () + kinetic_energy = SymbolicKineticEnergy(ghnn_arch.dim, ghnn_arch.width, ghnn_arch.nhidden, ghnn_arch.activation; parameters=ghnn_arch.parameters) + potential_energy = SymbolicPotentialEnergy(ghnn_arch.dim, ghnn_arch.width, ghnn_arch.nhidden, ghnn_arch.activation; parameters=ghnn_arch.parameters) + + for n in 1:ghnn_arch.n_integrators + c = (c..., SymplecticEulerA(kinetic_energy; return_parameters = true)) + c = n == ghnn_arch.n_integrators ? (c..., SymplecticEulerB(potential_energy; return_parameters=false)) : (c..., SymplecticEulerB(potential_energy; return_parameters=true)) + end + + Chain(c...) +end + +function (nn::NeuralNetwork{GT})(qp::QPTOAT2, problem_params::OptionalParameters) where {GT <: GeneralizedHamiltonianArchitecture} + nn.model(qp, problem_params, params(nn)) +end + +function (model::Chain)(qp::QPTOAT2, problem_params::OptionalParameters, params::Union{NeuralNetworkParameters, NamedTuple}) + model((qp, problem_params), params) +end + +function (c::Chain)(qp::QPT2{T, 3}, system_params::AbstractVector, ps::Union{NamedTuple, NeuralNetworkParameters})::QPT2{T} where {T} + @assert size(qp.q, 3) == length(system_params) + @assert size(qp.q, 2) == 1 + output_vectorwise = [c(index_gpt(qp, :, 1, i), system_params[i], ps) for i in axes(system_params, 1)] + q_output = hcat([single_output_vectorwise.q for single_output_vectorwise ∈ output_vectorwise]...) + p_output = hcat([single_output_vectorwise.p for single_output_vectorwise ∈ output_vectorwise]...) + (q = reshape(q_output, size(q_output, 1), 1, size(q_output, 2)), p = reshape(p_output, size(p_output, 1), 1, size(p_output, 2))) +end + +function (c::Chain)(qp::AbstractArray{T, 2}, system_params::AbstractVector, ps::Union{NamedTuple, NeuralNetworkParameters}) where {T} + @assert _size(qp, 2) == length(system_params) + qp_reshaped = reshape(qp, size(qp, 1), 1, length(system_params)) + c(qp_reshaped, system_params, ps) +end + +function (c::Chain)(qp::AbstractArray{T, 3}, system_params::AbstractVector, ps::Union{NamedTuple, NeuralNetworkParameters}) where {T} + @assert size(qp, 3) == length(system_params) + @assert size(qp, 2) == 1 + @assert iseven(size(qp, 1)) + n = size(qp, 1)÷2 + qp_split = assign_q_and_p(qp, n) + c_output = c(qp_split, system_params, ps)::QPT + reshape(vcat(c_output.q, c_output.p), 2n, length(system_params)) +end + +AbstractNeuralNetworks.networkbackend(::LazyArrays.ApplyArray) = AbstractNeuralNetworks.CPU() \ No newline at end of file diff --git a/src/architectures/hamiltonian_neural_network.jl b/src/architectures/hamiltonian_neural_network.jl index b3077952..00c85405 100644 --- a/src/architectures/hamiltonian_neural_network.jl +++ b/src/architectures/hamiltonian_neural_network.jl @@ -12,111 +12,3 @@ function HamiltonianArchitecture(dim::Integer, width::Integer, nhidden::Integer, @warn "You called the abstract type `HamiltonianArchitecture` as a constructor. This is defaulting to `StandardHamiltonianArchitecture`." StandardHamiltonianArchitecture(dim, width, nhidden, activation) end - -""" - StandardHamiltonianArchitecture <: HamiltonianArchitecture - -A realization of the standard Hamiltonian neural network (HNN) [greydanus2019hamiltonian](@cite). - -Also see [`GeneralizedHamiltonianArchitecture`](@ref). - -# Constructor - -The constructor takes the following input arguments: -1. `dim`: system dimension, -2. `width = dim`: width of the hidden layer. By default this is equal to `dim`, -3. `nhidden = $(HNN_nhidden_default)`: the number of hidden layers, -4. `activation = $(HNN_activation_default)`: the activation function used in the HNN. -""" -struct StandardHamiltonianArchitecture{AT} <: HamiltonianArchitecture{AT} - dim::Int - width::Int - nhidden::Int - activation::AT - - function StandardHamiltonianArchitecture(dim, width=dim, nhidden=HNN_nhidden_default, activation=HNN_activation_default) - new{typeof(activation)}(dim, width, nhidden, activation) - end -end - -GHNN_integrator_default = nothing - -""" - GeneralizedHamiltonianArchitecture <: HamiltonianArchitecture - -A realization of generalized Hamiltonian neural networks (GHNNs) as introduced in [horn2025generalized](@cite). - -Also see [`StandardHamiltonianArchitecture`](@ref). - -# Constructor - -The constructor takes the following input arguments: -1. `dim`: system dimension, -2. `width = dim`: width of the hidden layer. By default this is equal to `dim`, -3. `nhidden = $(HNN_nhidden_default)`: the number of hidden layers, -4. `activation = $(HNN_activation_default)`: the activation function used in the GHNN, -5. `integrator = $(GHNN_integrator_default)`: the integrator that is used to design the GHNN. -""" -struct GeneralizedHamiltonianArchitecture{AT, IT} <: HamiltonianArchitecture{AT} - dim::Int - width::Int - nhidden::Int - activation::AT - integrator::IT - - function GeneralizedHamiltonianArchitecture(dim, width=dim, nhidden=HNN_nhidden_default, activation=HNN_activation_default, integrator=GHNN_integrator_default) - error("GHNN still has to be implemented!") - new{typeof(activation), typeof(integrator)}(dim, width, nhidden, activation, integrator) - end -end - -@inline AbstractNeuralNetworks.dim(arch::HamiltonianArchitecture) = arch.dim - -""" - symbolic_hamiltonian_vector_field(nn::SymbolicNeuralNetwork) - -Get the symbolic expression for the vector field belonging to the HNN `nn`. - -# Implementation - -This is calling `SymbolicNeuralNetworks.Jacobian` and then multiplies the result with a Poisson tensor. -""" -function symbolic_hamiltonian_vector_field(nn::SymbolicNeuralNetwork) - □ = SymbolicNeuralNetworks.Jacobian(nn) - input_dim = input_dimension(nn.model) - n = input_dim ÷ 2 - # placeholder for one - @variables o - o_vec = repeat([o], n) - 𝕀 = Diagonal(o_vec) - 𝕆 = zero(𝕀) - 𝕁 = hcat(vcat(𝕆, -𝕀), vcat(𝕀, 𝕆)) - substitute(𝕁 * derivative(□)', Dict(o => 1, )) -end - -""" - hamiltonian_vector_field(arch::HamiltonianArchitecture) - -Compute an executable expression of the Hamiltonian vector field of a [`HamiltonianArchitecture`](@ref). - -# Implementation - -This first computes a symbolic expression of the vector field using [`symbolic_hamiltonian_vector_field`](@ref). -""" -function hamiltonian_vector_field(arch::HamiltonianArchitecture) - nn = SymbolicNeuralNetwork(arch) - hvf = symbolic_hamiltonian_vector_field(nn) - SymbolicNeuralNetworks.build_nn_function(hvf, nn.params, nn.input) -end - -function Chain(arch::HamiltonianArchitecture) - inner_layers = Tuple( - [Dense(arch.width, arch.width, arch.activation) for _ in 1:arch.nhidden] - ) - - Chain( - Dense(arch.dim, arch.width, arch.activation), - inner_layers..., - Linear(arch.width, 1; use_bias = false) - ) -end \ No newline at end of file diff --git a/src/architectures/parametric_resnet.jl b/src/architectures/parametric_resnet.jl new file mode 100644 index 00000000..5652ef4c --- /dev/null +++ b/src/architectures/parametric_resnet.jl @@ -0,0 +1,37 @@ +struct ParametricResNet{AT <: Activation, PT <: OptionalParameters} <: NeuralNetworkIntegrator + sys_dim::Int + n_blocks::Int + width::Int + parameters::PT + activation::AT + + function ParametricResNet(dim; width=dim, n_blocks = HNN_nhidden_default, activation=HNN_activation_default, parameters=NullParameters()) + activation = (typeof(activation) <: Activation) ? activation : Activation(activation) + new{typeof(activation), typeof(parameters)}(dim, n_blocks, width, parameters, activation) + end +end + +function ParametricResNet(dl::DataLoader, n_blocks::Integer, width::Integer=dl.input_dim; activation=HNN_activation_default, parameters=NullParameters()) + ParametricResNet(dl.input_dim; width=width, n_blocks=n_blocks, activation) +end + +function ResNet(input_dim::Integer, n_blocks::Integer, width::Integer=input_dim; activation=HNN_activation_default, parameters=NullParameters()) + typeof(parameters) <: NullParameters ? ResNet(input_dim, n_blocks, width, activation) : ParametricResNet(input_dim; n_blocks=n_blocks, width=width, parameters=parameters, activation=activation) +end + +function ResNet(input_dim::Integer; n_blocks::Integer, width::Integer=input_dim, activation=HNN_activation_default, parameters=NullParameters()) + ResNet(input_dim, n_blocks, width; activation=activation, parameters=parameters) +end + +function Chain(arch::ParametricResNet{AT}) where AT + layers = () + for _ in 1:arch.n_blocks + # nonlinear layers + layers = (layers..., ParametricResNetLayer(arch.sys_dim, arch.width, arch.activation; parameters=arch.parameters, return_parameters=true)) + end + + # linear layers for the output + layers = (layers..., ParametricResNetLayer(arch.sys_dim, arch.width, identity; parameters=arch.parameters, return_parameters=false)) + + Chain(layers...) +end \ No newline at end of file diff --git a/src/architectures/resnet.jl b/src/architectures/resnet.jl index cc8e9851..bc8829b6 100644 --- a/src/architectures/resnet.jl +++ b/src/architectures/resnet.jl @@ -23,20 +23,23 @@ where `dl` is an instance of `DataLoader`. See [`iterate`](@ref) for an example of this. """ struct ResNet{AT} <: NeuralNetworkIntegrator - sys_dim::Int - n_blocks::Int + sys_dim::Int + n_blocks::Int + width::Int activation::AT end -function ResNet(dl::DataLoader, n_blocks::Integer; activation = tanh) - ResNet(dl.input_dim, n_blocks, activation) +ResNet(sys_dim::Integer, n_blocks::Integer, activation) = ResNet(sys_dim, n_blocks, sys_dim, activation) + +function ResNet(dl::DataLoader, n_blocks::Integer, width::Integer=dl.input_dim; activation = tanh) + ResNet(dl.input_dim, n_blocks, width, activation) end function Chain(arch::ResNet{AT}) where AT layers = () for _ in 1:arch.n_blocks # nonlinear layers - layers = (layers..., ResNetLayer(arch.sys_dim, arch.activation; use_bias=true)) + layers = (layers..., arch.sys_dim == arch.width ? ResNetLayer(arch.sys_dim, arch.activation; use_bias=true) : WideResNetLayer(arch.sys_dim, arch.width, arch.activation)) end # linear layers for the output diff --git a/src/architectures/standard_hamiltonian_neural_network.jl b/src/architectures/standard_hamiltonian_neural_network.jl new file mode 100644 index 00000000..920cee8b --- /dev/null +++ b/src/architectures/standard_hamiltonian_neural_network.jl @@ -0,0 +1,77 @@ +""" + StandardHamiltonianArchitecture <: HamiltonianArchitecture + +A realization of the standard Hamiltonian neural network (HNN) [greydanus2019hamiltonian](@cite). + +Also see [`GeneralizedHamiltonianArchitecture`](@ref). + +# Constructor + +The constructor takes the following input arguments: +1. `dim`: system dimension, +2. `width = dim`: width of the hidden layer. By default this is equal to `dim`, +3. `nhidden = $(HNN_nhidden_default)`: the number of hidden layers, +4. `activation = $(HNN_activation_default)`: the activation function used in the HNN. +""" +struct StandardHamiltonianArchitecture{AT} <: HamiltonianArchitecture{AT} + dim::Int + width::Int + nhidden::Int + activation::AT + + function StandardHamiltonianArchitecture(dim::Integer, width=dim, nhidden=HNN_nhidden_default, activation=HNN_activation_default) + @assert iseven(dim) "The input dimension must be an even integer" + new{typeof(activation)}(dim, width, nhidden, activation) + end +end + +@inline AbstractNeuralNetworks.dim(arch::HamiltonianArchitecture) = arch.dim + +""" + symbolic_hamiltonian_vector_field(nn::SymbolicNeuralNetwork) + +Get the symbolic expression for the vector field belonging to the HNN `nn`. + +# Implementation + +This is calling `SymbolicNeuralNetworks.Jacobian` and then multiplies the result with a Poisson tensor. +""" +function symbolic_hamiltonian_vector_field(nn::SymbolicNeuralNetwork) + □ = SymbolicNeuralNetworks.Jacobian(nn) + input_dim = input_dimension(nn.model) + n = input_dim ÷ 2 + # placeholder for one + @variables o + o_vec = repeat([o], n) + 𝕀 = Diagonal(o_vec) + 𝕆 = zero(𝕀) + 𝕁 = hcat(vcat(𝕆, -𝕀), vcat(𝕀, 𝕆)) + substitute(𝕁 * derivative(□)', Dict(o => 1, )) +end + +""" + hamiltonian_vector_field(arch::HamiltonianArchitecture) + +Compute an executable expression of the Hamiltonian vector field of a [`HamiltonianArchitecture`](@ref). + +# Implementation + +This first computes a symbolic expression of the vector field using [`symbolic_hamiltonian_vector_field`](@ref). +""" +function hamiltonian_vector_field(arch::StandardHamiltonianArchitecture) + nn = SymbolicNeuralNetwork(arch) + hvf = symbolic_hamiltonian_vector_field(nn) + SymbolicNeuralNetworks.build_nn_function(hvf, nn.params, nn.input) +end + +function Chain(arch::StandardHamiltonianArchitecture) + inner_layers = Tuple( + [Dense(arch.width, arch.width, arch.activation) for _ in 1:arch.nhidden] + ) + + Chain( + Dense(arch.dim, arch.width, arch.activation), + inner_layers..., + Linear(arch.width, 1; use_bias = false) + ) +end \ No newline at end of file diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl index a575ab31..d41ba001 100644 --- a/src/data_loader/batch.jl +++ b/src/data_loader/batch.jl @@ -140,18 +140,19 @@ function number_of_batches(dl::DataLoader{T, AT, OT, :RegularData}, batch::Batch Int(ceil(dl.input_time_steps * dl.n_params / batch.batch_size)) end -function batch_over_two_axes(batch::Batch, number_columns::Int, third_dim::Int, dl::DataLoader) +function batch_over_two_axes(batch::Batch, number_columns::Integer, third_dim::Integer, n_batches::Integer) time_indices = shuffle(1:number_columns) parameter_indices = shuffle(1:third_dim) complete_indices = Iterators.product(time_indices, parameter_indices) |> collect |> vec batches = () - n_batches = number_of_batches(dl, batch) for batch_number in 1:(n_batches - 1) batches = (batches..., complete_indices[(batch_number - 1) * batch.batch_size + 1 : batch_number * batch.batch_size]) end (batches..., complete_indices[(n_batches - 1) * batch.batch_size + 1:end]) end +batch_over_two_axes(batch::Batch, number_of_columns::Integer, third_dim::Integer, dl::DataLoader) = batch_over_two_axes(batch, number_of_columns, third_dim, number_of_batches(dl, batch)) + function (batch::Batch)(dl::DataLoader{T, BT, OT, :RegularData}) where {T, AT<:AbstractArray{T, 3}, BT<:Union{AT, NamedTuple{(:q, :p), Tuple{AT, AT}}}, OT} batch_over_two_axes(batch, dl.input_time_steps, dl.n_params, dl) end @@ -186,7 +187,7 @@ end output[i, j, k] = data[i, indices[1, k] + seq_length + j - 1, indices[2, k]] end -# this is neeced if we want to use the vector of tuples in a kernel +# this is needed if we want to use the vector of tuples in a kernel function convert_vector_of_tuples_to_matrix(backend::Backend, batch_indices_tuple::Vector{Tuple{Int, Int}}) _batch_size = length(batch_indices_tuple) diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl index 70e39ea7..2d012505 100644 --- a/src/data_loader/data_loader.jl +++ b/src/data_loader/data_loader.jl @@ -145,7 +145,7 @@ function DataLoader(data::AbstractMatrix{T}; autoencoder=true, suppress_info = f input_dim, time_steps = size(data) reshaped_data = reshape(data, input_dim, time_steps, 1) return DataLoader{T, typeof(reshaped_data), Nothing, :TimeSeries}(reshaped_data, nothing, input_dim, time_steps, 1, nothing, nothing) - elseif autoencoder == true + elseif autoencoder == true input_dim, n_params = size(data) reshaped_data = reshape(data, input_dim, 1, n_params) return DataLoader{T, typeof(reshaped_data), Nothing, :RegularData}(reshaped_data, nothing, input_dim, 1, n_params, nothing, nothing) diff --git a/src/data_loader/optimize.jl b/src/data_loader/optimize.jl index 4e41b228..5a9cb007 100644 --- a/src/data_loader/optimize.jl +++ b/src/data_loader/optimize.jl @@ -86,8 +86,8 @@ _copy(qp::QPT) = (q = copy(qp.q), p = copy(qp.p)) _copy(t::Tuple{<:QPTOAT, <:QPTOAT}) = _copy.(t) function (o::Optimizer)(nn::NeuralNetwork, - dl::DataLoader, - batch::Batch, + dl, + batch::Batch, n_epochs::Integer, loss::NetworkLoss, _pullback::AbstractPullback = ZygotePullback(loss); show_progress = true) diff --git a/src/data_loader/parametric_data_loader.jl b/src/data_loader/parametric_data_loader.jl new file mode 100644 index 00000000..2c03049c --- /dev/null +++ b/src/data_loader/parametric_data_loader.jl @@ -0,0 +1,140 @@ +""" + ParametricDataLoader + +Very similar to [`DataLoader`](@ref), but can deal with parametric problems. +""" +struct ParametricDataLoader{T, AT<:QPTOAT2, VT<:AbstractVector} + input::AT + input_dim::Int + input_time_steps::Int + parameters::VT + n_params::Int + + function ParametricDataLoader(data::QPTOAT2{T, 3}, parameters::AbstractVector) where {T} + input_dim, input_time_steps, n_params = _size(data) + @assert T == _eltype(parameters) "Provided data and parameters must have the same eltype!" + @assert length(parameters) == _size(data, 3) "The number of provided parameters and the parameter axis of the supplied data do not have the same length!" + + new{T, typeof(data), typeof(parameters)}(data, input_dim, input_time_steps, parameters, n_params) + end +end + +function ParametricDataLoader(input::AbstractMatrix{T}, parameters::AbstractVector) where {T} + ParametricDataLoader(reshape(input, size(input)..., 1), parameters) +end + +function ParametricDataLoader(ensemble_solution::EnsembleSolution{T, T1, Vector{ST}}) where {T, + T1, + DT <: DataSeries{T}, + ST <: GeometricSolution{T, T1, NamedTuple{(:q, :p), Tuple{DT, DT}}} + } + + sys_dim = length(ensemble_solution.s[1].q[0]) + input_time_steps = length(ensemble_solution.t) + n_params = length(ensemble_solution.s) + params = ensemble_solution.problem.parameters + + data = (q = zeros(T, sys_dim, input_time_steps, n_params), p = zeros(T, sys_dim, input_time_steps, n_params)) + + for (solution, i) in zip(ensemble_solution.s, axes(ensemble_solution.s, 1)) + for dim in 1:sys_dim + data.q[dim, :, i] = solution.q[:, dim] + data.p[dim, :, i] = solution.p[:, dim] + end + end + + ParametricDataLoader(data, params) +end + +# """ +# rearrange_parameters(parameters) +# +# Rearrange `parameters` such that they can be used by [`ParametricDataLoader`](@ref). +# """ +# function rearrange_parameters(parameters::Vector{<:NamedTuple}) +# parameters_rearranged = zeros(_eltype(parameters), ) +# end + +# function batch_over_two_axes(batch::Batch, number_columns::Int, third_dim::Int, dl::ParametricDataLoader) +# time_indices = shuffle(1:number_columns) +# parameter_indices = shuffle(1:third_dim) +# complete_indices = Iterators.product(time_indices, parameter_indices) |> collect |> vec +# batches = () +# n_batches = number_of_batches(dl, batch) +# for batch_number in 1:(n_batches - 1) +# batches = (batches..., complete_indices[(batch_number - 1) * batch.batch_size + 1 : batch_number * batch.batch_size]) +# end +# (batches..., complete_indices[(n_batches - 1) * batch.batch_size + 1:end]) +# end + +function optimize_for_one_epoch!( opt::Optimizer, + model, + ps::Union{NeuralNetworkParameters, NamedTuple}, + dl::ParametricDataLoader{T}, + batch::Batch, + _pullback::AbstractPullback, + λY) where T + count = 0 + total_error = T(0) + batches = batch(dl) + for batch_indices in batches + count += 1 + # these `copy`s should not be necessary! coming from a Zygote problem! + _input_nt_output_nt_parameter_indices = convert_input_and_batch_indices_to_array(dl, batch, batch_indices) + # input_nt_output_nt = _input_nt_output_nt_parameter_indices[1:2] + loss_value, pullback = _pullback(ps, model, _input_nt_output_nt_parameter_indices) + total_error += loss_value + dp = _get_params(_unpack_tuple(pullback(one(loss_value)))) + optimization_step!(opt, λY, ps, dp) + end + total_error / count +end + +function parameter_indices(parameters::AbstractVector, parameter_indices::AbstractVector{Int}) + [parameters[parameter_index] for parameter_index in parameter_indices] +end + +function parameter_indices(parameters::AbstractVector, batch_indices::AbstractMatrix{Int}) + parameter_indices(parameters, batch_indices[2, :]) +end + +function parameter_indices(dl::ParametricDataLoader, indices::AbstractArray{Int}) + parameter_indices(dl.parameters, indices) +end + +function convert_input_and_batch_indices_to_array(dl::ParametricDataLoader{T, BT}, batch::Batch, batch_indices_tuple::Vector{Tuple{Int, Int}}) where {T, AT<:AbstractArray{T, 3}, BT<:NamedTuple{(:q, :p), Tuple{AT, AT}}} + backend = networkbackend(dl.input.q) + + # the batch size is smaller for the last batch + _batch_size = length(batch_indices_tuple) + + batch_indices = convert_vector_of_tuples_to_matrix(backend, batch_indices_tuple) + + q_input = KernelAbstractions.allocate(backend, T, dl.input_dim ÷ 2, batch.seq_length, _batch_size) + p_input = similar(q_input) + + assign_input_from_vector_of_tuples! = assign_input_from_vector_of_tuples_kernel!(backend) + assign_input_from_vector_of_tuples!(q_input, p_input, dl.input, batch_indices, ndrange=(dl.input_dim ÷ 2, batch.seq_length, _batch_size)) + + q_output = KernelAbstractions.allocate(backend, T, dl.input_dim ÷ 2, batch.prediction_window, _batch_size) + p_output = similar(q_output) + + assign_output_from_vector_of_tuples! = assign_output_from_vector_of_tuples_kernel!(backend) + assign_output_from_vector_of_tuples!(q_output, p_output, dl.input, batch_indices, batch.seq_length, ndrange=(dl.input_dim ÷ 2, batch.prediction_window, _batch_size)) + + (q = q_input, p = p_input), (q = q_output, p = p_output), parameter_indices(dl, batch_indices) +end + +function number_of_batches(dl::ParametricDataLoader, batch::Batch) + @assert dl.input_time_steps ≥ (batch.seq_length + batch.prediction_window) "The number of time steps has to be greater than sequence length + prediction window." + Int(ceil((dl.input_time_steps - (batch.seq_length - 1) - batch.prediction_window) * dl.n_params / batch.batch_size)) +end + +function (batch::Batch)(dl::ParametricDataLoader) + batch_over_two_axes(batch, dl.input_time_steps - (batch.seq_length - 1) - batch.prediction_window, dl.n_params, number_of_batches(dl, batch)) +end + +function (o::Optimizer)(nn::NeuralNetwork{<:GeneralizedHamiltonianArchitecture}, dl::ParametricDataLoader, batch::Batch{:FeedForward}, n_epochs::Integer=1, loss::NetworkLoss=ParametricLoss(); kwargs...) + _pullback::AbstractPullback = ZygotePullback(loss) + o(nn, dl, batch, n_epochs, loss, _pullback; kwargs...) +end \ No newline at end of file diff --git a/src/layers/forcing_dissipation_layers.jl b/src/layers/forcing_dissipation_layers.jl new file mode 100644 index 00000000..c816a1eb --- /dev/null +++ b/src/layers/forcing_dissipation_layers.jl @@ -0,0 +1,162 @@ +@doc raw""" + ForcingLayer <: AbstractExplicitLayer + +Layers that can learn dissipative or forcing terms, but not conservative ones. + +Use the constructors [`ForcingLayerQ`](@ref) and [`ForcingLayerP`](@ref) for this. + +!!! warn + The forcing is dependent on either ``q`` or ``p``, but always applied to the ``p`` component. + +The forcing layers are inspired by the Lagrange-d'Alembert integrator from [marsden2001discrete; Example 3.2.2](@cite): + +```math +\begin{aligned} + q^{(t+1)} = & q^{(t)} + & hM^{-1}p^{(t)}, \\ + p^{(t+1)} = & p^{(t)} + & -h\nabla{}U(q^{(t+1)}) + hf_H(q^{(t+1)}, p^{(t)}), +\end{aligned} +``` +for a separable Hamiltonian ``H(q, p) = T(p) + U(q) = p^TM^{-1}p + U(q)`` and external forcing ``f_H.`` +""" +struct ForcingLayer{M, N, PT<:Base.Callable, CT, type, ReturnParameters} <: AbstractExplicitLayer{M, N} + dim::Int + width::Int + nhidden::Int + parameter_length::Int + parameter_convert::PT + model::CT +end + +parameterlength(l::ForcingLayer) = parameterlength(l.model) + +function initialparameters(rng::Random.AbstractRNG, init_weight::AbstractNeuralNetworks.Initializer, integrator::ForcingLayer, backend::KernelAbstractions.Backend, ::Type{T}) where {T} + initialparameters(rng, init_weight, integrator.model, backend, T) +end + +""" + ForcingLayerQ + +A layer that is derived from the more general [`ForcingLayer`](@ref) and the resulting forcing only depends on the ``q`` component. +""" +const ForcingLayerQ{M, N, FT, AT, ReturnParameters} = ForcingLayer{M, N, FT, AT, :Q, ReturnParameters} + +""" + ForcingLayerP + +A layer that is derived from the more general [`ForcingLayer`](@ref) and the resulting forcing only depends on the ``p`` component. +""" +const ForcingLayerP{M, N, FT, AT, ReturnParameters} = ForcingLayer{M, N, FT, AT, :P, ReturnParameters} + +""" + ForcingLayerQP + +A layer that is derived from the more general [`ForcingLayer`](@ref) and the resulting forcing only depends on the ``q`` and the ``p`` component. +""" +const ForcingLayerQP{M, N, FT, AT, ReturnParameters} = ForcingLayer{M, N, FT, AT, :QP, ReturnParameters} + +function build_chain(dim::Integer, width::Integer, nhidden::Integer, parameter_length::Integer, activation, type::Symbol) + inner_layers = Tuple( + [Dense(width, width, activation) for _ in 1:nhidden] + ) + + Chain( + type == :QP ? Dense(dim + parameter_length, width, activation) : Dense(dim÷2 + parameter_length, width, activation), + inner_layers..., + Linear(width, dim÷2; use_bias = false) + ) +end + +function ForcingLayer(dim::Integer, width::Integer, nhidden::Integer, activation; parameters::OptionalParameters=NullParameters(), return_parameters::Bool, type::Symbol) + flattened_parameters = ParameterHandling.flatten(parameters) + parameter_length = length(flattened_parameters[1]) + c = build_chain(dim, width, nhidden, parameter_length, activation, type) + ForcingLayer{dim, dim, typeof(flattened_parameters[2]), typeof(c), type, return_parameters}(dim, width, nhidden, parameter_length, flattened_parameters[2], c) +end + +""" + ForcingLayerQ(dim) + +# Examples + +```julia +ForcingLayerQ(dim, width, nhidden, activation; parameters, return_parameters) +``` +""" +function ForcingLayerQ(dim::Integer, width::Integer=dim, nhidden::Integer=HNN_nhidden_default, activation=HNN_activation_default; parameters::OptionalParameters=NullParameters(), return_parameters::Bool=false) + ForcingLayer(dim, width, nhidden, activation; parameters=parameters, return_parameters=return_parameters, type=:Q) +end + +""" + ForcingLayerP(dim) + +See [`ForcingLayerQ`](@ref). + +# Examples + +```julia +ForcingLayerP(dim, width, nhidden, activation; parameters, return_parameters) +``` +""" +function ForcingLayerP(dim::Integer, width::Integer=dim, nhidden::Integer=HNN_nhidden_default, activation=HNN_activation_default; parameters::OptionalParameters=NullParameters(), return_parameters::Bool=false) + ForcingLayer(dim, width, nhidden, activation; parameters=parameters, return_parameters=return_parameters, type=:P) +end + +function ForcingLayerQP(dim::Integer, width::Integer=dim, nhidden::Integer=HNN_nhidden_default, activation=HNN_activation_default; parameters::OptionalParameters=NullParameters(), return_parameters::Bool=false) + ForcingLayer(dim, width, nhidden, activation; parameters=parameters, return_parameters=return_parameters, type=:QP) +end + +function (integrator::ForcingLayerQ{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.q, problem_params) + (q = qp.q, p = qp.p + integrator.model(input, params)) +end + +function (integrator::ForcingLayerP{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.p, problem_params) + (q = qp.q, p = qp.p + integrator.model(input, params)) +end + +function (integrator::ForcingLayerQP{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(vcat(qp.q, qp.p), problem_params) + (q = qp.q, p = qp.p + integrator.model(input, params)) +end + +function (integrator::ForcingLayerQ{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.q, problem_params) + ((q = qp.q, p = qp.p + integrator.model(input, params)), problem_params) +end + +function (integrator::ForcingLayerP{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.p, problem_params) + ((q = qp.q, p = qp.p + integrator.model(input, params)), problem_params) +end + +function (integrator::ForcingLayerQP{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(vcat(qp.q, qp.p), problem_params) + ((q = qp.q, p = qp.p + integrator.model(input, params)), problem_params) +end + +function (integrator::ForcingLayer)(qp_params::Tuple{<:QPTOAT2, <:OptionalParameters}, params::NeuralNetworkParameters) + integrator(qp_params..., params) +end + +function (integrator::ForcingLayer)(::TT, ::NeuralNetworkParameters) where {TT <: Tuple} + error("The input is of type $(TT). This shouldn't be the case!") +end + +function (integrator::ForcingLayer{M, N, FT, AT, Type, true})(qp::AbstractArray, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT, Type} + @assert iseven(size(qp, 1)) + n = size(qp, 1)÷2 + qp_split = assign_q_and_p(qp, n) + evaluated = integrator(qp_split, problem_params, params)[1] + (vcat(evaluated.q, evaluated.p), problem_params) +end + +function (integrator::ForcingLayer{M, N, FT, AT, Type, false})(qp::AbstractArray, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT, Type} + @assert iseven(size(qp, 1)) + n = size(qp, 1)÷2 + qp_split = assign_q_and_p(qp, n) + evaluated = integrator(qp_split, problem_params, params) + vcat(evaluated.q, evaluated.p) +end + +(integrator::ForcingLayer)(qp::QPTOAT2, params::NeuralNetworkParameters) = integrator(qp, NullParameters(), params) \ No newline at end of file diff --git a/src/layers/parametric_resnet_layer.jl b/src/layers/parametric_resnet_layer.jl new file mode 100644 index 00000000..6f97e1eb --- /dev/null +++ b/src/layers/parametric_resnet_layer.jl @@ -0,0 +1,65 @@ +struct ParametricResNetLayer{M, N, F1 <: Activation, PT, ReturnParameters} <: AbstractExplicitLayer{M, N} + width::Int + activation::F1 + parameter_length::Int + parameter_convert::PT +end + +function ParametricResNetLayer(dim::Integer, width::Integer, activation=identity; parameters::OptionalParameters=NullParameters(), return_parameters::Bool) + flattened_parameters = ParameterHandling.flatten(parameters) + parameter_length = length(flattened_parameters[1]) + _activation = Activation(activation) + ParametricResNetLayer{dim, dim, typeof(_activation), typeof(flattened_parameters[2]), return_parameters}(width, _activation, parameter_length, flattened_parameters[2]) +end + +function initialparameters(rng::Random.AbstractRNG, init_weight::AbstractNeuralNetworks.Initializer, l::ParametricResNetLayer{M, M}, backend::KernelAbstractions.Backend, ::Type{T}; init_bias = ZeroInitializer()) where {M, T} + upscale_weight = KernelAbstractions.allocate(backend, T, l.width, M + l.parameter_length) + upscale_bias = KernelAbstractions.allocate(backend, T, l.width) + downscale_weight = KernelAbstractions.allocate(backend, T, M, l.width) + bias = KernelAbstractions.allocate(backend, T, M) + init_weight(rng, upscale_weight) + init_weight(rng, downscale_weight) + init_bias(rng, upscale_bias) + init_bias(rng, bias) + (upscale_weight=upscale_weight, downscale_weight=downscale_weight, upscale_bias=upscale_bias, bias=bias) +end + +parameterlength(l::ParametricResNetLayer{M, M}) where {M} = (l.width + l.parameter_length) * (M + 1) + M * (l.width + 1) + +function (d::ParametricResNetLayer{M, M, F, PT, false})(x::AbstractVecOrMat, problem_params::OptionalParameters, ps::NamedTuple) where {M, F, PT} + input = concatenate_array_with_parameters(x, problem_params) + x + d.activation.(ps.downscale_weight * d.activation.(ps.upscale_weight * input .+ ps.upscale_bias) .+ ps.bias) +end + +function (d::ParametricResNetLayer{M, M, F, PT, true})(x::AbstractVecOrMat, problem_params::OptionalParameters, ps::NamedTuple) where {M, F, PT} + input = concatenate_array_with_parameters(x, problem_params) + (x + d.activation.(ps.downscale_weight * d.activation.(ps.upscale_weight * input .+ ps.upscale_bias) .+ ps.bias), problem_params) +end + +# function (d::ParametricResNetLayer{M, M, F, PT, false})(x::AbstractArray{T, 3}, problem_params::AbstractVector, ps::NamedTuple) where {M, F, PT, T} +# input = concatenate_array_with_parameters(x, problem_params) +# x + d.activation.(mat_tensor_mul(ps.downscale_weight, d.activation.(mat_tensor_mul(ps.upscale_weight, x) .+ ps.upscale_bias)) .+ ps.bias) +# end +# +# function (d::ParametricResNetLayer{M, M, F, PT, true})(x::AbstractArray{T, 3}, problem_params::AbstractVector, ps::NamedTuple) where {M, F, PT, T} +# input = concatenate_array_with_parameters(x, problem_params) +# (x + d.activation.(mat_tensor_mul(ps.downscale_weight, d.activation.(mat_tensor_mul(ps.upscale_weight, x) .+ ps.upscale_bias)) .+ ps.bias), problem_params) +# end + +(d::ParametricResNetLayer)(input::Tuple, ps::NamedTuple) = length(input) == 2 ? d(input..., ps) : error("The tuple must contain the input array/nt as well as the system parameters.") + +function (d::ParametricResNetLayer{M, M, F, PT, false})(z::QPT, problem_params::OptionalParameters, ps::NamedTuple) where {M, F, PT} + @assert iseven(M) + @assert size(z.q, 1) * 2 == M + N2 = M ÷ 2 + output = d(vcat(z.q, z.p), problem_params, ps) + assign_q_and_p(output, N2) +end + +function (d::ParametricResNetLayer{M, M, F, PT, true})(z::QPT, problem_params::OptionalParameters, ps::NamedTuple) where {M, F, PT} + @assert iseven(M) + @assert size(z.q, 1) * 2 == M + N2 = M ÷ 2 + output = d(vcat(z.q, z.p), problem_params, ps) + (assign_q_and_p(output[1], N2), problem_params) +end \ No newline at end of file diff --git a/src/layers/sympnets.jl b/src/layers/sympnets.jl index ba6e2c15..6187b7b2 100644 --- a/src/layers/sympnets.jl +++ b/src/layers/sympnets.jl @@ -236,36 +236,36 @@ function custom_vec_mul(scale::AbstractVector{T}, x::AbstractArray{T, 3}) where vec_tensor_mul(scale, x) end -@inline function (d::ActivationLayerQ{M, M})(x::NamedTuple, ps) where {M} +@inline function (d::ActivationLayerQ{M, M})(x::QPT2, ps) where {M} size(x.q, 1) == M÷2 || error("Dimension mismatch.") return (q = x.q + custom_vec_mul(ps.scale, d.activation.(x.p)), p = x.p) end -@inline function (d::ActivationLayerP{M, M})(x::NamedTuple, ps) where {M} +@inline function (d::ActivationLayerP{M, M})(x::QPT2, ps) where {M} size(x.q, 1) == M÷2 || error("Dimension mismatch.") return (q = x.q, p = x.p + custom_vec_mul(ps.scale, d.activation.(x.q))) end -@inline function (d::GradientLayerQ{M, M})(x::NamedTuple, ps) where {M} +@inline function (d::GradientLayerQ{M, M})(x::QPT2, ps) where {M} size(x.q, 1) == M÷2 || error("Dimension mismatch.") (q = x.q + custom_mat_mul(ps.weight', (custom_vec_mul(ps.scale, d.activation.(custom_mat_mul(ps.weight, x.p) .+ ps.bias)))), p = x.p) end -@inline function(d::GradientLayerP{M, M})(x::NamedTuple, ps) where {M} +@inline function(d::GradientLayerP{M, M})(x::QPT2, ps) where {M} size(x.q, 1) == M÷2 || error("Dimension mismatch.") (q = x.q, p = x.p + custom_mat_mul(ps.weight', (custom_vec_mul(ps.scale, d.activation.(custom_mat_mul(ps.weight, x.q) .+ ps.bias))))) end -@inline function(d::LinearLayerQ{M, M})(x::NamedTuple, ps) where {M} +@inline function(d::LinearLayerQ{M, M})(x::QPT2, ps) where {M} size(x.q, 1) == M÷2 || error("Dimension mismatch.") (q = x.q + custom_mat_mul(ps.weight, x.p), p = x.p) end -@inline function(d::LinearLayerP{M, M})(x::NamedTuple, ps) where {M} +@inline function(d::LinearLayerP{M, M})(x::QPT2, ps) where {M} size(x.q, 1) == M÷2 || error("Dimension mismatch.") (q = x.q, p = x.p + custom_mat_mul(ps.weight, x.q)) end diff --git a/src/layers/wide_resnet.jl b/src/layers/wide_resnet.jl new file mode 100644 index 00000000..f1a4c468 --- /dev/null +++ b/src/layers/wide_resnet.jl @@ -0,0 +1,32 @@ +struct WideResNetLayer{M, N, F1} <: AbstractExplicitLayer{M, N} + width::Int + activation::F1 +end + +WideResNetLayer(dim::Integer, width::Integer, activation=identity) = WideResNetLayer{dim, dim, typeof(activation)}(width, activation) + +function initialparameters(rng::Random.AbstractRNG, init_weight::AbstractNeuralNetworks.Initializer, l::WideResNetLayer{M, M}, backend::KernelAbstractions.Backend, ::Type{T}; init_bias = ZeroInitializer()) where {M, T} + upscale_weight = KernelAbstractions.allocate(backend, T, l.width, M) + upscale_bias = KernelAbstractions.allocate(backend, T, l.width) + downscale_weight = KernelAbstractions.allocate(backend, T, M, l.width) + bias = KernelAbstractions.allocate(backend, T, M) + init_weight(rng, upscale_weight) + init_weight(rng, downscale_weight) + init_bias(rng, upscale_bias) + init_bias(rng, bias) + (upscale_weight=upscale_weight, downscale_weight=downscale_weight, upscale_bias=upscale_bias, bias=bias) +end + +parameterlength(l::WideResNetLayer{M, M}) where {M} = l.width * (M + 1) + M * (l.width + 1) + +(d::WideResNetLayer{M, M})(x::AbstractVecOrMat, ps::NamedTuple) where {M} = x + d.activation.(ps.downscale_weight * d.activation.(ps.upscale_weight * x .+ ps.upscale_bias) .+ ps.bias) + +(d::WideResNetLayer{M, M})(x::AbstractArray{T, 3}, ps::NamedTuple) where {M, T} = x + d.activation.(mat_tensor_mul(ps.downscale_weight, d.activation.(mat_tensor_mul(ps.upscale_weight, x) .+ ps.upscale_bias)) .+ ps.bias) + +function (d::WideResNetLayer{M, M})(z::QPT, ps::NamedTuple) where {M} + @assert iseven(M) + @assert size(z.q, 1) * 2 == M + N2 = M ÷ 2 + output = d(vcat(z.q, z.p), ps) + assign_q_and_p(output, N2) +end \ No newline at end of file diff --git a/src/loss/losses.jl b/src/loss/losses.jl index 93764f99..e72b5218 100644 --- a/src/loss/losses.jl +++ b/src/loss/losses.jl @@ -226,4 +226,10 @@ end function (loss::ReducedLoss)(model::Chain, params::NeuralNetworkParameters, input::CT, output::CT) where {CT <: QPTOAT} _compute_loss(loss.decoder(model(loss.encoder(input), params)), output) +end + +struct ParametricLoss <: NetworkLoss end + +function (loss::ParametricLoss)(model::Chain, params::Union{NamedTuple, NeuralNetworkParameters}, input::CT, output::CT, system_parameters::Union{NamedTuple, AbstractVector}) where {CT <: QPTOAT} + _compute_loss(model(input, system_parameters, params), output) end \ No newline at end of file diff --git a/src/optimizers/optimizer.jl b/src/optimizers/optimizer.jl index 1395ce42..f81bbc68 100644 --- a/src/optimizers/optimizer.jl +++ b/src/optimizers/optimizer.jl @@ -86,7 +86,7 @@ end ####################################################################################### # optimization step function -function _optimization_step!(o::Optimizer, λY::NamedTuple, ps::NamedTuple, cache::NamedTuple, dx::Union{NamedTuple, NeuralNetworkParameters}) +function _optimization_step!(o::Optimizer, λY::NamedTuple, ps::Union{NeuralNetworkParameters, NamedTuple}, cache::NamedTuple, dx::Union{NamedTuple, NeuralNetworkParameters}) gx = rgrad(ps, dx) B = global_rep(λY, gx) update!(o, cache, B) @@ -162,6 +162,21 @@ end rgrad(ps::NamedTuple, dx::Union{NamedTuple, NeuralNetworkParameters}) = apply_toNT(rgrad, ps, dx) +rgrad(a::AbstractArray, ::Nothing) = rgrad(a, zero(a)) + +rgrad(nt::NamedTuple, ::Nothing) = rgrad(nt, apply_toNT(zero, nt)) + +_get_params_without_warning(nt) = _get_params(nt) +_get_params_without_warning(nt::NamedTuple{(:params,), Tuple{AT}}) where {AT <: Any} = nt.params + +function rgrad(ps::NeuralNetworkParameters, dp::NamedTuple) + rgrad(NamedTuple{keys(ps)}(values(ps)), _get_params_without_warning(dp)) +end + +function rgrad(ps::NeuralNetworkParameters, dp::NeuralNetworkParameters) + rgrad(ps, NamedTuple{keys(dp)}(values(dp))) +end + function rgrad(Y::AbstractVecOrMat, dx::AbstractVecOrMat) @assert size(Y) == size(dx) dx diff --git a/src/pullbacks/symbolic_hnn_pullback.jl b/src/pullbacks/symbolic_hnn_pullback.jl index b0899e03..a514676c 100644 --- a/src/pullbacks/symbolic_hnn_pullback.jl +++ b/src/pullbacks/symbolic_hnn_pullback.jl @@ -11,4 +11,90 @@ function SymbolicPullback(arch::HamiltonianArchitecture) nn = SymbolicNeuralNetwork(arch) loss = HNNLoss(arch) SymbolicPullback(nn, loss) +end + +# TODO: This constitutes type piracy - should be in `SymbolicNeuralNetworks`! +# TODO: note that the the whole concatenation business may not be the ideal solution, but implementing a separate build_nn_function for any number of input arguments isn't either. Find something better here!! +function (_pullback::SymbolicPullback)(ps, model, input_and_parameters::Tuple{<:QPTOAT, <:QPTOAT, <:Union{AbstractVector, NamedTuple}})::Tuple + input, output, system_params = input_and_parameters + _pullback.loss(model, ps, concatenate_array_with_parameters(input, system_params), output), _pullback.fun(input, output, system_params, ps) +end + +function assign_q_and_p(z::SymbolicNeuralNetworks.Symbolics.Arr{SymbolicNeuralNetworks.Symbolics.Num, 1}, N::Int) + @assert length(z) == 2N + (q = z[1:N], p = z[(N+1):2N]) +end + +function SymbolicNeuralNetworks.Symbolics.SymbolicUtils.Code.create_array(::Type{<:SymbolicNeuralNetworks.Symbolics.Arr}, ::Nothing, ::Val, ::Val{dims}, elems...) where {dims} + SymbolicNeuralNetworks.Symbolics.Arr([elems...]) +end + +function ParameterHandling.flatten(::Type{T}, ps::NeuralNetworkParameters) where {T<:Real} + _ps = NamedTuple{keys(ps)}(values(ps)) + x_vec, unflatten_to_NamedTuple = ParameterHandling.flatten(T, _ps) + function unflatten_to_NeuralNetworkParameters(v::Vector{T}) + nt = unflatten_to_NamedTuple(v) + NeuralNetworkParameters{keys(nt)}(values(nt)) + end + x_vec, unflatten_to_NeuralNetworkParameters +end +function ParameterHandling.flatten(T::Type{SymbolicNeuralNetworks.Symbolics.Num}, v::SymbolicNeuralNetworks.Symbolics.Arr{SymbolicNeuralNetworks.Symbolics.Num, 1}) + ParameterHandling.flatten(T, [elem for elem ∈ v]) +end + +""" + semi_flatten_network_parameters(params) + +Should be used together with [`GeneralizedHamiltonianArchitecture`](@ref) and `SymbolicPullback`s. +""" +function semi_flatten_network_parameters(::Type{T}, params::NeuralNetworkParameters) where {T} + _values = Tuple(ParameterHandling.flatten(T, value)[1] for value ∈ values(params)) + NeuralNetworkParameters{keys(params)}(_values) +end + +""" + SymbolicPullback(nn, loss, system_params) + +The `SymbolicPullback` for the case when we have a parametrized system (with `system_params`). +""" +function SymbolicPullback(nn::NeuralNetwork, loss::ParametricLoss, system_params::OptionalParameters) + cache = Dict() + symbolic_system_parameters = SymbolicNeuralNetworks.symbolize!(cache, system_params, :S) + cache = Dict() + symbolic_network_parameters = SymbolicNeuralNetworks.symbolize!(cache, nn.params, :W) + + input_dim = input_dimension(nn.model) + output_dim = output_dimension(nn.model) + flattened_params = ParameterHandling.flatten(SymbolicNeuralNetworks.Symbolics.Num, symbolic_system_parameters) + SymbolicNeuralNetworks.Symbolics.@variables sinput[1:(input_dim + length(system_params))] + SymbolicNeuralNetworks.Symbolics.@variables soutput[1:output_dim] + symbolic_loss = loss(nn.model, symbolic_network_parameters, sinput[1:input_dim], soutput, flattened_params[2]([elem for elem ∈ sinput[(input_dim+1):end]])) + symbolic_diffs = SymbolicNeuralNetworks.symbolic_differentials(symbolic_network_parameters) + symbolic_pullbacks = [SymbolicNeuralNetworks.symbolic_derivative(f_single, symbolic_diffs) for f_single ∈ symbolic_loss] + + semi_flattened_network_parameters = semi_flatten_network_parameters(SymbolicNeuralNetworks.Symbolics.Num, symbolic_network_parameters) + pbs_executable = SymbolicNeuralNetworks.build_nn_function(symbolic_pullbacks, semi_flattened_network_parameters, sinput, soutput) + function pbs(input, output, params) + T = (typeof(input) <: QPT) ? eltype(input.q) : eltype(input) + pullback(::Union{Real, AbstractArray{<:Real}}) = _get_contents(_get_params(pbs_executable(input, output, semi_flatten_network_parameters(T, params)))) + pullback + end + SymbolicPullback(loss, pbs) +end + +function (_pullback::SymbolicPullback)(ps, model, input_and_parameters::Tuple{<:AbstractArray, <:AbstractArray, <:Union{NamedTuple, AbstractVector}})::Tuple + input, output, system_params = input_and_parameters + _pullback(ps, model, (concatenate_array_with_parameters(input, system_params), output)) +end + +function (_pullback::SymbolicPullback)(ps, model, input_and_parameters::Tuple{AT, AT, <:Union{NamedTuple, AbstractVector}})::Tuple where {T, AT <: AbstractArray{T, 3}} + input, output, system_params = input_and_parameters + _input = reshape(input, size(input, 1), size(input, 2) * size(input, 3)) + _output = reshape(output, size(output, 1), size(output, 2) * size(output, 3)) + _pullback.loss(model, ps, _input, _output, system_params), _pullback.fun(concatenate_array_with_parameters(_input, system_params), _output, ps) +end + +function (_pullback::SymbolicPullback)(ps, model, input_and_parameters::Tuple{<:QPT, <:QPT, <:Union{NamedTuple, AbstractVector}})::Tuple + input, output, system_params = input_and_parameters + _pullback(ps, model, (vcat(input.q, input.p), vcat(output.q, output.p), system_params)) end \ No newline at end of file diff --git a/src/pullbacks/zygote_pullback.jl b/src/pullbacks/zygote_pullback.jl index e33e081d..30776722 100644 --- a/src/pullbacks/zygote_pullback.jl +++ b/src/pullbacks/zygote_pullback.jl @@ -27,8 +27,22 @@ struct ZygotePullback{NNLT} <: AbstractPullback{NNLT} loss::NNLT end -(_pullback::ZygotePullback)(ps, model, input_nt::QPTOAT)::Tuple = Zygote.pullback(ps -> _pullback.loss(model, ps, input_nt), ps) -(_pullback::ZygotePullback)(ps, model, input_nt_output_nt::Tuple{<:QPTOAT, <:QPTOAT})::Tuple = Zygote.pullback(ps -> _pullback.loss(model, ps, input_nt_output_nt...), ps) +function (_pullback::ZygotePullback)(ps, model, input_nt::QPTOAT)::Tuple + closure = ps -> _pullback.loss(model, ps, input_nt) + Zygote.pullback(closure, ps) +end +function (_pullback::ZygotePullback)(ps, model, input_nt_output_nt::Tuple{<:QPTOAT, <:QPTOAT})::Tuple + closure = ps -> _pullback.loss(model, ps, input_nt_output_nt...) + Zygote.pullback(closure, ps) +end +function (_pullback::ZygotePullback)(ps, model, input_and_parameters::Tuple{<:QPTOAT, <:QPTOAT, <:NamedTuple})::Tuple + closure = ps -> _pullback.loss(model, ps, input_and_parameters...) + Zygote.pullback(closure, ps) +end +function (_pullback::ZygotePullback)(ps, model, input_and_parameters::Tuple{<:QPTOAT, <:QPTOAT, <:AbstractVector})::Tuple + closure = ps -> _pullback.loss(model, ps, input_and_parameters...) + Zygote.pullback(closure, ps) +end """ _processing(returned_pullback) diff --git a/src/training_method/symplectic_euler.jl b/src/training_method/symplectic_euler.jl index fab5de86..ea06c5b3 100644 --- a/src/training_method/symplectic_euler.jl +++ b/src/training_method/symplectic_euler.jl @@ -1,27 +1,27 @@ -abstract type SymplecticEuler <: HnnTrainingMethod end +abstract type SymplecticEulerIntegrator <: HnnTrainingMethod end -struct SymplecticEulerA <: SymplecticEuler end -struct SymplecticEulerB <: SymplecticEuler end +struct SymplecticEulerIntegratorA <: SymplecticEulerIntegrator end +struct SymplecticEulerIntegratorB <: SymplecticEulerIntegrator end SEuler(;sqdist = sqeuclidean) = SEulerA(sqdist = sqdist) -SEulerA(;sqdist = sqeuclidean) = TrainingMethod{SymplecticEulerA, PhaseSpaceSymbol, TrajectoryData, typeof(sqdist)}(sqdist) -SEulerB(;sqdist = sqeuclidean) = TrainingMethod{SymplecticEulerB, PhaseSpaceSymbol, TrajectoryData, typeof(sqdist)}(sqdist) +SEulerA(;sqdist = sqeuclidean) = TrainingMethod{SymplecticEulerIntegratorA, PhaseSpaceSymbol, TrajectoryData, typeof(sqdist)}(sqdist) +SEulerB(;sqdist = sqeuclidean) = TrainingMethod{SymplecticEulerIntegratorB, PhaseSpaceSymbol, TrajectoryData, typeof(sqdist)}(sqdist) -function loss_single(::TrainingMethod{SymplecticEulerA}, nn::AbstractNeuralNetwork{<:HamiltonianArchitecture}, qₙ, qₙ₊₁, pₙ, pₙ₊₁, Δt, params = params(nn)) +function loss_single(::TrainingMethod{SymplecticEulerIntegratorA}, nn::AbstractNeuralNetwork{<:HamiltonianArchitecture}, qₙ, qₙ₊₁, pₙ, pₙ₊₁, Δt, params = params(nn)) dH = vectorfield(nn, [qₙ₊₁...,pₙ...], params) sqeuclidean(dH[1],(qₙ₊₁-qₙ)/Δt) + sqeuclidean(dH[2],(pₙ₊₁-pₙ)/Δt) end -function loss_single(::TrainingMethod{SymplecticEulerB}, nn::AbstractNeuralNetwork{<:HamiltonianArchitecture}, qₙ, qₙ₊₁, pₙ, pₙ₊₁, Δt, params = params(nn)) +function loss_single(::TrainingMethod{SymplecticEulerIntegratorB}, nn::AbstractNeuralNetwork{<:HamiltonianArchitecture}, qₙ, qₙ₊₁, pₙ, pₙ₊₁, Δt, params = params(nn)) dH = vectorfield(nn, [qₙ...,pₙ₊₁...], params) sqeuclidean(dH[1],(qₙ₊₁-qₙ)/Δt) + sqeuclidean(dH[2],(pₙ₊₁-pₙ)/Δt) end -get_loss(::TrainingMethod{<:SymplecticEuler}, ::AbstractNeuralNetwork{<:HamiltonianArchitecture}, data::TrainingData{<:DataSymbol{<:PhaseSpaceSymbol}}, args) = (get_data(data,:q, args...), get_data(data,:q, next(args...)...), get_data(data,:p, args...), get_data(data,:p,next(args...)...), get_Δt(data)) +get_loss(::TrainingMethod{<:SymplecticEulerIntegrator}, ::AbstractNeuralNetwork{<:HamiltonianArchitecture}, data::TrainingData{<:DataSymbol{<:PhaseSpaceSymbol}}, args) = (get_data(data,:q, args...), get_data(data,:q, next(args...)...), get_data(data,:p, args...), get_data(data,:p,next(args...)...), get_Δt(data)) -loss(ti::TrainingMethod{<:SymplecticEuler}, nn::AbstractNeuralNetwork{<:HamiltonianArchitecture}, data::TrainingData{<:DataSymbol{<:PhaseSpaceSymbol}}, index_batch = eachindex(ti, data), params = params(nn)) = +loss(ti::TrainingMethod{<:SymplecticEulerIntegrator}, nn::AbstractNeuralNetwork{<:HamiltonianArchitecture}, data::TrainingData{<:DataSymbol{<:PhaseSpaceSymbol}}, index_batch = eachindex(ti, data), params = params(nn)) = mapreduce(args->loss_single(Zygote.ignore_derivatives(ti), nn, get_loss(ti, nn, data, args)..., params),+, index_batch) -min_length_batch(::SymplecticEuler) = 2 \ No newline at end of file +min_length_batch(::SymplecticEulerIntegrator) = 2 \ No newline at end of file diff --git a/src/utils.jl b/src/utils.jl index 0fe9b3b5..78c068ef 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -151,7 +151,9 @@ qp = (q = [1, 2], p = [3, 4]) ``` """ -const QPT{T} = NamedTuple{(:q, :p), Tuple{AT, AT}} where {T, AT <: AbstractArray{T}} +const QPT{T} = NamedTuple{(:q, :p), Tuple{AT, AT}} where {T, N, AT <: AbstractArray{T, N}} + +const QPT2{T, N} = NamedTuple{(:q, :p), Tuple{AT₁, AT₂}} where {T, N, AT₁ <: AbstractArray{T, N}, AT₂ <: AbstractArray{T, N}} @doc raw""" QPTOAT @@ -163,10 +165,42 @@ const QPTOAT = Union{QPT, AbstractArray} This could be data in ``(q, p)\in\mathbb{R}^{2d}`` form or come from an arbitrary vector space. """ -const QPTOAT{T} = Union{QPT{T}, AbstractArray{T}} where T +const QPTOAT{T} = Union{QPT{T}, AbstractArray{T}} where {T} + +const QPTOAT2{T, N} = Union{QPT2{T, N}, AbstractArray{T, N}} where {T, N} Base.:≈(qp₁::QPT, qp₂::QPT) = (qp₁.q ≈ qp₂.q) & (qp₁.p ≈ qp₂.p) _eltype(x) = eltype(x) -_eltype(ps::NamedTuple) = _eltype(ps[1]) -_eltype(ps::Tuple) = _eltype(ps[1]) \ No newline at end of file +_eltype(::NTuple{N, FT}) where {N, FT <: AbstractFloat} = FT +_eltype(nt::NamedTuple) = _eltype(values(nt)) + +_size(x) = size(x) +function _size(qp::QPT) + q_size = _size(qp.q) + p_size = _size(qp.p) + @assert q_size == p_size + (2q_size[1], q_size[2:end]...) +end + +_size(x, a::Integer) = size(x, a) +function _size(qp::QPT, a::Integer) + q_size = _size(qp.q, a) + p_size = _size(qp.p, a) + @assert q_size == p_size + a == 1 ? 2q_size : q_size +end + +function _eltype(parameters::Vector{<:NamedTuple}) + types = DataType[] + for v ∈ parameters + push!(types, _eltype(v)) + end + T = types[1] + for T₂ ∈ types + T == T₂ || error("There are different types in the NamedTuples!") + end + T +end + +end_eltype(ps::Tuple) = _eltype(ps[1]) \ No newline at end of file diff --git a/test/data_loader/parametric_data_loader_test.jl b/test/data_loader/parametric_data_loader_test.jl new file mode 100644 index 00000000..572bb0bb --- /dev/null +++ b/test/data_loader/parametric_data_loader_test.jl @@ -0,0 +1,35 @@ +using GeometricMachineLearning +using GeometricMachineLearning: convert_input_and_batch_indices_to_array +using Test +using GeometricProblems.CoupledHarmonicOscillator: hodeensemble, default_parameters +using GeometricIntegrators: ImplicitMidpoint, integrate +using Random: seed! +seed!(123) + +function make_alternative_parameters_by_adding_constant(params::NamedTuple=default_parameters, a::Number=1.) + _keys = keys(params) + values = () + for key in _keys + values = (values..., params[key] .+ a) + end + NamedTuple{_keys}(values) +end + +alternative_parameters = make_alternative_parameters_by_adding_constant() + +h_ensemble = hodeensemble(; parameters = [default_parameters, alternative_parameters]) +sol = integrate(h_ensemble, ImplicitMidpoint()) +dl = ParametricDataLoader(sol) +batch = Batch(2) +batch_indices = batch(dl) + +function test_correct_parameter_splitting(n::Integer, parameters₁ = default_parameters, parameters₂ = alternative_parameters) + data_in_batch = convert_input_and_batch_indices_to_array(dl, batch, batch_indices[n]) + # the third output and the second datum in the batch + @test data_in_batch[3][1] == parameters₁ + @test data_in_batch[3][1] != parameters₂ + nothing +end + +test_correct_parameter_splitting(250, default_parameters, alternative_parameters) +test_correct_parameter_splitting(101, alternative_parameters, default_parameters) \ No newline at end of file diff --git a/test/generalized_hamiltonian_neural_networks/pghnn_symbolic_pullback_single_layer_test.jl b/test/generalized_hamiltonian_neural_networks/pghnn_symbolic_pullback_single_layer_test.jl new file mode 100644 index 00000000..2a87fc0e --- /dev/null +++ b/test/generalized_hamiltonian_neural_networks/pghnn_symbolic_pullback_single_layer_test.jl @@ -0,0 +1,66 @@ +using GeometricMachineLearning +using GeometricMachineLearning: Activation, SymplecticEulerB, SymplecticPotentialEnergy, SymbolicNeuralNetwork, ParametricLoss, QPT + +params, dim, width, nhidden, activation = (m = 1., ω = π / 2), 2, 2, 1, tanh + +se = GeometricMachineLearning.SymbolicPotentialEnergy(dim, width, nhidden, activation; parameters = params) +l = SymplecticEulerB(se; return_parameters=false) +c = Chain(l) +nn = NeuralNetwork(c) + +cache = Dict() +symbolic_network_parameters = GeometricMachineLearning.SymbolicNeuralNetworks.symbolize!(cache, nn.params, :W) +cache2 = Dict() +symbolic_system_parameters = GeometricMachineLearning.SymbolicNeuralNetworks.symbolize!(cache2, params, :S) +loss = ParametricLoss() + +function GeometricMachineLearning.ParameterHandling.flatten(::Type{T}, ps::NeuralNetworkParameters) where {T<:Real} + _ps = NamedTuple{keys(ps)}(values(ps)) + x_vec, unflatten_to_NamedTuple = GeometricMachineLearning.ParameterHandling.flatten(T, _ps) + function unflatten_to_NeuralNetworkParameters(v::Vector{T}) + nt = unflatten_to_NamedTuple(v) + NeuralNetworkParameters{keys(nt)}(values(nt)) + end + x_vec, unflatten_to_NeuralNetworkParameters +end +function GeometricMachineLearning.ParameterHandling.flatten(T::Type{GeometricMachineLearning.SymbolicNeuralNetworks.Symbolics.Num}, v::GeometricMachineLearning.SymbolicNeuralNetworks.Symbolics.Arr{GeometricMachineLearning.SymbolicNeuralNetworks.Symbolics.Num, 1}) + GeometricMachineLearning.ParameterHandling.flatten(T, [elem for elem ∈ v]) +end + +flattened_params = GeometricMachineLearning.ParameterHandling.flatten(GeometricMachineLearning.SymbolicNeuralNetworks.Symbolics.Num, symbolic_system_parameters) +GeometricMachineLearning.SymbolicNeuralNetworks.Symbolics.@variables sinput[1:(dim + length(params))] +GeometricMachineLearning.SymbolicNeuralNetworks.Symbolics.@variables soutput[1:GeometricMachineLearning.output_dimension(nn.model)] +# gen_fun = GeometricMachineLearning.SymbolicNeuralNetworks._build_nn_function(symbolic_pullbacks[1].L1.L1.W, snn.params, GeometricMachineLearning.concatenate_array_with_parameters(snn.input, symbolic_system_parameters), soutput) +symbolic_loss = loss(nn.model, symbolic_network_parameters, sinput[1:dim], soutput, flattened_params[2]([elem for elem ∈ sinput[(dim+1):end]])) +symbolic_diffs = GeometricMachineLearning.SymbolicNeuralNetworks.symbolic_differentials(symbolic_network_parameters) +symbolic_pullbacks = [GeometricMachineLearning.SymbolicNeuralNetworks.symbolic_derivative(f_single, symbolic_diffs) for f_single ∈ symbolic_loss] + +""" + semi_flatten_network_parameters(params) + +Should be used together with [`GeneralizedHamiltonianArchitecture`](@ref) and `SymbolicPullback`s. +""" +function semi_flatten_network_parameters(::Type{T}, params::NeuralNetworkParameters) where {T} + _values = Tuple(GeometricMachineLearning.ParameterHandling.flatten(T, value)[1] for value ∈ values(params)) + NeuralNetworkParameters{keys(params)}(_values) +end + +semi_flattened_network_parameters = semi_flatten_network_parameters(GeometricMachineLearning.SymbolicNeuralNetworks.Symbolics.Num, symbolic_network_parameters) +pbs_executable = GeometricMachineLearning.SymbolicNeuralNetworks.build_nn_function(symbolic_pullbacks, semi_flattened_network_parameters, sinput, soutput) +function pbs(input, output, params) + T = eltype(input) + pullback(::Union{Real, AbstractArray{<:Real}}) = GeometricMachineLearning._get_contents(GeometricMachineLearning._get_params(pbs_executable(input, output, semi_flatten_network_parameters(T, params)))) + pullback +end + +function (_pullback::SymbolicPullback)(ps, model, input_and_parameters::Tuple{<:AbstractArray, <:AbstractArray, <:Union{NamedTuple, AbstractVector}})::Tuple + input, output, system_params = input_and_parameters + _pullback(ps, model, (concatenate_array_with_parameters(input, system_params), output)) +end +function (_pullback::SymbolicPullback)(ps, model, input_and_parameters::Tuple{<:QPT, <:QPT, <:Union{NamedTuple, AbstractVector}})::Tuple + input, output, system_params = input_and_parameters + _pullback(ps, model, (vcat(input.q, input.p), vcat(output.q, output.p), system_params)) +end + +_pullback = GeometricMachineLearning.SymbolicNeuralNetworks.SymbolicPullback(loss, pbs) +_pullback.fun(rand(4, 10), rand(2, 10), nn.params)(1.) \ No newline at end of file diff --git a/test/generalized_hamiltonian_neural_networks_test.jl b/test/generalized_hamiltonian_neural_networks_test.jl new file mode 100644 index 00000000..cdd4030d --- /dev/null +++ b/test/generalized_hamiltonian_neural_networks_test.jl @@ -0,0 +1,25 @@ +using GeometricMachineLearning +using GeometricMachineLearning: OptionalParameters, OneInitializer +using GeometricProblems.HarmonicOscillator: odeproblem, default_parameters +using GeometricIntegrators +using Test + +sol = integrate(odeproblem(), ImplicitMidpoint()) +dim = length(sol.problem.ics.q) + +dl = DataLoader(sol) + +function test_ghnn_without_parameters(dim::Integer = dim) + arch = GeneralizedHamiltonianArchitecture(dim) + nn = NeuralNetwork(arch; initializer=OneInitializer()) + @test nn([1., 1.]) ≈ [1.003217200759985, 0.9968055760434815] +end + +function test_ghnn_with_parameters(dim::Integer = dim, parameters::OptionalParameters = default_parameters) + arch = GeneralizedHamiltonianArchitecture(dim, parameters = parameters) + nn = NeuralNetwork(arch; initializer=OneInitializer()) + @test nn([1., 1.], parameters) ≈ [1.0000350420844089, 0.9999649603746436] +end + +test_ghnn_without_parameters() +test_ghnn_with_parameters() \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 7f0bd48f..8cd93ffb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -27,6 +27,7 @@ using Documenter: doctest @safetestset "Gradient Layer " begin include("layers/gradient_layer_tests.jl") end @safetestset "Test symplecticity of upscaling layer " begin include("layers/sympnet_layers_test.jl") end @safetestset "Hamiltonian Neural Network " begin include("hamiltonian_neural_network_tests.jl") end +@safetestset "Generalized Hamiltonian Neural Network " begin include("generalized_hamiltonian_neural_networks_test.jl") end @safetestset "Manifold Neural Network Layers " begin include("layers/manifold_layers.jl") end @safetestset "Custom tensor matrix multiplication " begin include("kernels/tensor_mat_mul.jl") end @@ -61,6 +62,7 @@ using Documenter: doctest @safetestset "Test the data loader in combination with optimization_step! " begin include("data_loader/data_loader_optimization_step.jl") end @safetestset "Optimizer functor with data loader for Adam " begin include("data_loader/optimizer_functor_with_adam.jl") end @safetestset "Test data loader for a tensor (q and p data) " begin include("data_loader/draw_batch_for_tensor_test.jl") end +@safetestset "Parametric DataLoader " begin include("data_loader/parametric_data_loader_test.jl") end @safetestset "Test NetworkLoss + Optimizer " begin include("network_losses/losses_and_optimization.jl") end @@ -80,4 +82,6 @@ using Documenter: doctest @safetestset "Linear Symplectic Attention " begin include("linear_symplectic_attention.jl") end @safetestset "Linear Symplectic Transformer " begin include("linear_symplectic_transformer.jl") end -@safetestset "DataLoader for input and output " begin include("data_loader/data_loader_for_input_and_output.jl") end \ No newline at end of file +@safetestset "DataLoader for input and output " begin include("data_loader/data_loader_for_input_and_output.jl") end + +@safetestset "Test symbolic pullback for PGHNN with single layer " begin include("generalized_hamiltonian_neural_networks/pghnn_symbolic_pullback_single_layer_test.jl") end \ No newline at end of file diff --git a/test/train!/test_method.jl b/test/train!/test_method.jl index fbce31e8..4df65d03 100644 --- a/test/train!/test_method.jl +++ b/test/train!/test_method.jl @@ -24,7 +24,7 @@ exacthnn = ExactHnn() sympeuler = SEuler() -@test GeometricMachineLearning.type(sympeuler) == SymplecticEulerA +@test GeometricMachineLearning.type(sympeuler) == SymplecticEulerIntegratorA @test symbols(sympeuler) == PhaseSpaceSymbol @test shape(sympeuler) == TrajectoryData @test min_length_batch(sympeuler) == 2 @@ -64,7 +64,7 @@ midpointlnn = VariaMidPoint() ######################################### @testerror GeometricMachineLearning.type(default_Method(sympnet, tra_pos_data)) -@test GeometricMachineLearning.type(default_method(hnn, tra_ps_data)) == SymplecticEulerA +@test GeometricMachineLearning.type(default_method(hnn, tra_ps_data)) == SymplecticEulerIntegratorA @test GeometricMachineLearning.type(default_method(hnn, sam_dps_data)) == HnnExactMethod @test GeometricMachineLearning.type(default_method(sympnet, tra_ps_data)) == BasicSympNetMethod @test GeometricMachineLearning.type(default_method(lnn, tra_pos_data)) == VariationalMidPointMethod diff --git a/test/training_phnn.jl b/test/training_phnn.jl index 4f30fda1..4b5098e6 100644 --- a/test/training_phnn.jl +++ b/test/training_phnn.jl @@ -27,4 +27,4 @@ batch = Batch(100) arch = GeneralizedHamiltonianArchitecture(4; parameters = default_parameters) nn = NeuralNetwork(arch) o = Optimizer(AdamOptimizer(), nn) -o(nn, dl, batch) \ No newline at end of file +o(nn, dl, batch)