From 4bc3118d1c84f2f366e1a0862f234d82b9826a12 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Sun, 13 Jul 2025 10:37:33 +0200 Subject: [PATCH 01/50] (i) renamed hamiltonian_neural_network.jl to standard_hamiltonian_neural_network.jl (ii) created generalized_hamiltonian_neural_network where I defined energy functions (iii) moved basic functionality form standard_hamiltonian_neural_network.jl to new file hamiltonian_neural_network.jl. --- src/GeometricMachineLearning.jl | 2 + .../generalized_hamiltonian_neural_network.jl | 75 ++++++++++++ .../hamiltonian_neural_network.jl | 108 ------------------ .../standard_hamiltonian_neural_network.jl | 77 +++++++++++++ 4 files changed, 154 insertions(+), 108 deletions(-) create mode 100644 src/architectures/generalized_hamiltonian_neural_network.jl create mode 100644 src/architectures/standard_hamiltonian_neural_network.jl diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl index bdc2d8e1..ff7ff0bf 100644 --- a/src/GeometricMachineLearning.jl +++ b/src/GeometricMachineLearning.jl @@ -265,6 +265,8 @@ module GeometricMachineLearning include("architectures/psd.jl") include("architectures/fixed_width_network.jl") include("architectures/hamiltonian_neural_network.jl") + include("architectures/standard_hamiltonian_neural_network.jl") + include("architectures/generalized_hamiltonian_neural_network.jl") include("architectures/lagrangian_neural_network.jl") include("architectures/variable_width_network.jl") include("architectures/recurrent_neural_network.jl") diff --git a/src/architectures/generalized_hamiltonian_neural_network.jl b/src/architectures/generalized_hamiltonian_neural_network.jl new file mode 100644 index 00000000..fd220400 --- /dev/null +++ b/src/architectures/generalized_hamiltonian_neural_network.jl @@ -0,0 +1,75 @@ +""" + SymbolicEnergy + +See [`SymbolicPotentialEnergy`](@ref) and [`SymbolicKineticEnergy`](@ref). +""" +struct SymbolicEnergy{AT <: Activation, PT <: OptionalParameters, Kinetic} + dim::Int + width::Int + nhidden::Int + activation::AT + parameters::PT + + function SymbolicEnergy{Kinetic}(dim, width=dim÷2, nhidden=HNN_nhidden_default, activation=HNN_activation_default; parameters::PT=NullParameters()) where {Kinetic, PT} + @assert iseven(dim) "The input dimension must be an even integer!" + new{typeof(activation), PT, Kinetic}(dim ÷ 2, width, nhidden, activation, parameters) + end +end + +""" + SymbolicPotentialEnergy + +# Constructors + +```julia +SymbolicPotentialEnergy(dim) +``` + +# Parameter Dependence +""" +const SymbolicPotentialEnergy{AT} = SymbolicEnergy{AT, :potential} + +""" + SymbolicKineticEnergy + +# Constructors + +```julia + +``` +""" +const SymbolicKineticEnergy{AT} = SymbolicEnergy{AT, :kinetic} + +SymbolicPotentialEnergy(args...; kwargs...) = SymbolicEnergy{:potential}(args...; kwargs...) +SymbolicKineticEnergy(args...; kwargs...) = KineticEnergy{:kinetic}(args...; kwargs...) + +GHNN_integrator_default = nothing + +""" + GeneralizedHamiltonianArchitecture <: HamiltonianArchitecture + +A realization of generalized Hamiltonian neural networks (GHNNs) as introduced in [horn2025generalized](@cite). + +Also see [`StandardHamiltonianArchitecture`](@ref). + +# Constructor + +The constructor takes the following input arguments: +1. `dim`: system dimension, +2. `width = dim`: width of the hidden layer. By default this is equal to `dim`, +3. `nhidden = $(HNN_nhidden_default)`: the number of hidden layers, +4. `activation = $(HNN_activation_default)`: the activation function used in the GHNN, +5. `integrator = $(GHNN_integrator_default)`: the integrator that is used to design the GHNN. +""" +struct GeneralizedHamiltonianArchitecture{AT, IT} <: HamiltonianArchitecture{AT} + dim::Int + width::Int + nhidden::Int + activation::AT + integrator::IT + + function GeneralizedHamiltonianArchitecture(dim, width=dim, nhidden=HNN_nhidden_default, activation=HNN_activation_default, integrator=GHNN_integrator_default) + error("GHNN still has to be implemented!") + new{typeof(activation), typeof(integrator)}(dim, width, nhidden, activation, integrator) + end +end \ No newline at end of file diff --git a/src/architectures/hamiltonian_neural_network.jl b/src/architectures/hamiltonian_neural_network.jl index b3077952..00c85405 100644 --- a/src/architectures/hamiltonian_neural_network.jl +++ b/src/architectures/hamiltonian_neural_network.jl @@ -12,111 +12,3 @@ function HamiltonianArchitecture(dim::Integer, width::Integer, nhidden::Integer, @warn "You called the abstract type `HamiltonianArchitecture` as a constructor. This is defaulting to `StandardHamiltonianArchitecture`." StandardHamiltonianArchitecture(dim, width, nhidden, activation) end - -""" - StandardHamiltonianArchitecture <: HamiltonianArchitecture - -A realization of the standard Hamiltonian neural network (HNN) [greydanus2019hamiltonian](@cite). - -Also see [`GeneralizedHamiltonianArchitecture`](@ref). - -# Constructor - -The constructor takes the following input arguments: -1. `dim`: system dimension, -2. `width = dim`: width of the hidden layer. By default this is equal to `dim`, -3. `nhidden = $(HNN_nhidden_default)`: the number of hidden layers, -4. `activation = $(HNN_activation_default)`: the activation function used in the HNN. -""" -struct StandardHamiltonianArchitecture{AT} <: HamiltonianArchitecture{AT} - dim::Int - width::Int - nhidden::Int - activation::AT - - function StandardHamiltonianArchitecture(dim, width=dim, nhidden=HNN_nhidden_default, activation=HNN_activation_default) - new{typeof(activation)}(dim, width, nhidden, activation) - end -end - -GHNN_integrator_default = nothing - -""" - GeneralizedHamiltonianArchitecture <: HamiltonianArchitecture - -A realization of generalized Hamiltonian neural networks (GHNNs) as introduced in [horn2025generalized](@cite). - -Also see [`StandardHamiltonianArchitecture`](@ref). - -# Constructor - -The constructor takes the following input arguments: -1. `dim`: system dimension, -2. `width = dim`: width of the hidden layer. By default this is equal to `dim`, -3. `nhidden = $(HNN_nhidden_default)`: the number of hidden layers, -4. `activation = $(HNN_activation_default)`: the activation function used in the GHNN, -5. `integrator = $(GHNN_integrator_default)`: the integrator that is used to design the GHNN. -""" -struct GeneralizedHamiltonianArchitecture{AT, IT} <: HamiltonianArchitecture{AT} - dim::Int - width::Int - nhidden::Int - activation::AT - integrator::IT - - function GeneralizedHamiltonianArchitecture(dim, width=dim, nhidden=HNN_nhidden_default, activation=HNN_activation_default, integrator=GHNN_integrator_default) - error("GHNN still has to be implemented!") - new{typeof(activation), typeof(integrator)}(dim, width, nhidden, activation, integrator) - end -end - -@inline AbstractNeuralNetworks.dim(arch::HamiltonianArchitecture) = arch.dim - -""" - symbolic_hamiltonian_vector_field(nn::SymbolicNeuralNetwork) - -Get the symbolic expression for the vector field belonging to the HNN `nn`. - -# Implementation - -This is calling `SymbolicNeuralNetworks.Jacobian` and then multiplies the result with a Poisson tensor. -""" -function symbolic_hamiltonian_vector_field(nn::SymbolicNeuralNetwork) - □ = SymbolicNeuralNetworks.Jacobian(nn) - input_dim = input_dimension(nn.model) - n = input_dim ÷ 2 - # placeholder for one - @variables o - o_vec = repeat([o], n) - 𝕀 = Diagonal(o_vec) - 𝕆 = zero(𝕀) - 𝕁 = hcat(vcat(𝕆, -𝕀), vcat(𝕀, 𝕆)) - substitute(𝕁 * derivative(□)', Dict(o => 1, )) -end - -""" - hamiltonian_vector_field(arch::HamiltonianArchitecture) - -Compute an executable expression of the Hamiltonian vector field of a [`HamiltonianArchitecture`](@ref). - -# Implementation - -This first computes a symbolic expression of the vector field using [`symbolic_hamiltonian_vector_field`](@ref). -""" -function hamiltonian_vector_field(arch::HamiltonianArchitecture) - nn = SymbolicNeuralNetwork(arch) - hvf = symbolic_hamiltonian_vector_field(nn) - SymbolicNeuralNetworks.build_nn_function(hvf, nn.params, nn.input) -end - -function Chain(arch::HamiltonianArchitecture) - inner_layers = Tuple( - [Dense(arch.width, arch.width, arch.activation) for _ in 1:arch.nhidden] - ) - - Chain( - Dense(arch.dim, arch.width, arch.activation), - inner_layers..., - Linear(arch.width, 1; use_bias = false) - ) -end \ No newline at end of file diff --git a/src/architectures/standard_hamiltonian_neural_network.jl b/src/architectures/standard_hamiltonian_neural_network.jl new file mode 100644 index 00000000..5d70304d --- /dev/null +++ b/src/architectures/standard_hamiltonian_neural_network.jl @@ -0,0 +1,77 @@ +""" + StandardHamiltonianArchitecture <: HamiltonianArchitecture + +A realization of the standard Hamiltonian neural network (HNN) [greydanus2019hamiltonian](@cite). + +Also see [`GeneralizedHamiltonianArchitecture`](@ref). + +# Constructor + +The constructor takes the following input arguments: +1. `dim`: system dimension, +2. `width = dim`: width of the hidden layer. By default this is equal to `dim`, +3. `nhidden = $(HNN_nhidden_default)`: the number of hidden layers, +4. `activation = $(HNN_activation_default)`: the activation function used in the HNN. +""" +struct StandardHamiltonianArchitecture{AT} <: HamiltonianArchitecture{AT} + dim::Int + width::Int + nhidden::Int + activation::AT + + function StandardHamiltonianArchitecture(dim::Integer, width=dim, nhidden=HNN_nhidden_default, activation=HNN_activation_default) + @assert iseven(dim) "The input dimension must be an even integer" + new{typeof(activation)}(dim, width, nhidden, activation) + end +end + +@inline AbstractNeuralNetworks.dim(arch::HamiltonianArchitecture) = arch.dim + +""" + symbolic_hamiltonian_vector_field(nn::SymbolicNeuralNetwork) + +Get the symbolic expression for the vector field belonging to the HNN `nn`. + +# Implementation + +This is calling `SymbolicNeuralNetworks.Jacobian` and then multiplies the result with a Poisson tensor. +""" +function symbolic_hamiltonian_vector_field(nn::SymbolicNeuralNetwork) + □ = SymbolicNeuralNetworks.Jacobian(nn) + input_dim = input_dimension(nn.model) + n = input_dim ÷ 2 + # placeholder for one + @variables o + o_vec = repeat([o], n) + 𝕀 = Diagonal(o_vec) + 𝕆 = zero(𝕀) + 𝕁 = hcat(vcat(𝕆, -𝕀), vcat(𝕀, 𝕆)) + substitute(𝕁 * derivative(□)', Dict(o => 1, )) +end + +""" + hamiltonian_vector_field(arch::HamiltonianArchitecture) + +Compute an executable expression of the Hamiltonian vector field of a [`HamiltonianArchitecture`](@ref). + +# Implementation + +This first computes a symbolic expression of the vector field using [`symbolic_hamiltonian_vector_field`](@ref). +""" +function hamiltonian_vector_field(arch::HamiltonianArchitecture) + nn = SymbolicNeuralNetwork(arch) + hvf = symbolic_hamiltonian_vector_field(nn) + SymbolicNeuralNetworks.build_nn_function(hvf, nn.params, nn.input) +end + +function Chain(arch::HamiltonianArchitecture) + inner_layers = Tuple( + [Dense(arch.width, arch.width, arch.activation) for _ in 1:arch.nhidden] + ) + + Chain( + Dense(arch.dim, arch.width, arch.activation), + inner_layers..., + Linear(arch.width, 1; use_bias = false) + ) +end \ No newline at end of file From 7992f2b173774af13cd55f55f5787e9b6144eecf Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 14 Jul 2025 10:51:17 +0200 Subject: [PATCH 02/50] Continued implementing PGHNNs. --- Project.toml | 2 + src/GeometricMachineLearning.jl | 2 + .../generalized_hamiltonian_neural_network.jl | 109 +++++++++++++++--- .../standard_hamiltonian_neural_network.jl | 4 +- ...alized_hamiltonian_neural_networks_test.jl | 10 ++ 5 files changed, 111 insertions(+), 16 deletions(-) create mode 100644 test/generalized_hamiltonian_neural_networks_test.jl diff --git a/Project.toml b/Project.toml index ec4b5cb7..4b99284f 100644 --- a/Project.toml +++ b/Project.toml @@ -20,6 +20,7 @@ KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" LazyArrays = "5078a376-72f3-5289-bfd5-ec5146d43c02" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" +ParameterHandling = "2412ca09-6db7-441c-8e3a-88d5709968c5" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" @@ -48,6 +49,7 @@ HDF5 = "0.16, 0.17" KernelAbstractions = "0.9" LazyArrays = "=2.3.2" NNlib = "0.8, 0.9" +ParameterHandling = "0.5.0" ProgressMeter = "1" SafeTestsets = "0.1" StatsBase = "0.33, 0.34" diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl index ff7ff0bf..55d149fc 100644 --- a/src/GeometricMachineLearning.jl +++ b/src/GeometricMachineLearning.jl @@ -22,6 +22,8 @@ module GeometricMachineLearning using SymbolicNeuralNetworks: derivative, _get_contents, _get_params, SymbolicNeuralNetwork using Symbolics: @variables, substitute + import ParameterHandling: flatten + import AbstractNeuralNetworks: Architecture, Model, AbstractExplicitLayer, AbstractExplicitCell, AbstractNeuralNetwork , NeuralNetwork, UnknownArchitecture, FeedForwardLoss import AbstractNeuralNetworks: Chain, GridCell import AbstractNeuralNetworks: Dense, Linear, Recurrent diff --git a/src/architectures/generalized_hamiltonian_neural_network.jl b/src/architectures/generalized_hamiltonian_neural_network.jl index fd220400..f7edb304 100644 --- a/src/architectures/generalized_hamiltonian_neural_network.jl +++ b/src/architectures/generalized_hamiltonian_neural_network.jl @@ -3,19 +3,24 @@ See [`SymbolicPotentialEnergy`](@ref) and [`SymbolicKineticEnergy`](@ref). """ -struct SymbolicEnergy{AT <: Activation, PT <: OptionalParameters, Kinetic} +struct SymbolicEnergy{AT <: Activation, PT, Kinetic} dim::Int width::Int nhidden::Int + parameter_length::Int + parameter_convert::PT activation::AT - parameters::PT - function SymbolicEnergy{Kinetic}(dim, width=dim÷2, nhidden=HNN_nhidden_default, activation=HNN_activation_default; parameters::PT=NullParameters()) where {Kinetic, PT} + function SymbolicEnergy{Kinetic}(dim, width=dim, nhidden=HNN_nhidden_default, activation=HNN_activation_default; parameters::OptionalParameters=NullParameters()) where {Kinetic} @assert iseven(dim) "The input dimension must be an even integer!" - new{typeof(activation), PT, Kinetic}(dim ÷ 2, width, nhidden, activation, parameters) + flattened_parameters = flatten(parameters) + parameter_length = length(flattened_parameters[2]) + new{typeof(activation), typeof(flattened_parameters[2]), Kinetic}(dim, width, nhidden, parameter_length, flattened_parameters[2], activation) end end +ParameterHandling.flatten(::NullParameters) = flatten(NamedTuple()) + """ SymbolicPotentialEnergy @@ -27,7 +32,7 @@ SymbolicPotentialEnergy(dim) # Parameter Dependence """ -const SymbolicPotentialEnergy{AT} = SymbolicEnergy{AT, :potential} +const SymbolicPotentialEnergy{AT, PT} = SymbolicEnergy{AT, PT, :potential} """ SymbolicKineticEnergy @@ -38,13 +43,73 @@ const SymbolicPotentialEnergy{AT} = SymbolicEnergy{AT, :potential} ``` """ -const SymbolicKineticEnergy{AT} = SymbolicEnergy{AT, :kinetic} +const SymbolicKineticEnergy{AT, PT} = SymbolicEnergy{AT, PT, :kinetic} + +function Chain(se::SymbolicEnergy) + inner_layers = Tuple( + [Dense(se.width, se.width, se.activation) for _ in 1:se.nhidden] + ) + + Chain( + Dense(se.dim÷2 + se.parameter_length, se.width, se.activation), + inner_layers..., + Linear(se.width, 1; use_bias = false) + ) +end + +function SymbolicNeuralNetworks.Jacobian(f::EqT, nn::AbstractSymbolicNeuralNetwork, dim2::Integer) + # make differential + Dx = symbolic_differentials(nn.input)[1:dim2] + + # Evaluation of gradient + s∇f = hcat([expand_derivatives.(Symbolics.scalarize(dx(f))) for dx in Dx]...) + + Jacobian(f, s∇f, nn) +end + +function SymbolicNeuralNetworks.Jacobian(nn::AbstractSymbolicNeuralNetwork, dim2::Integer) + + # Evaluation of the symbolic output + soutput = nn.model(nn.input, params(nn)) + + Jacobian(soutput, nn, dim2) +end + +function build_gradient(se::SymbolicEnergy) + model = Chain(se) + nn = SymbolicNeuralNetwork(model) + □ = SymbolicNeuralNetworks.Jacobian(nn, se.dim÷2) + SymbolicNeuralNetworks.build_nn_function(□, nn.params, nn.input) +end + +struct SymplecticEuler{M, N, FT<:Callable, AT<:Architecture, type} <: AbstractExplicitLayer{M, N} + gradient_function::FT + energy_architecture::AT +end + +function initialparameters(integrator::SymplecticEuler) + +end + +const SymplecticEulerA{M, N, FT, AT} = SymplecticEuler{M, N, FT, AT, :A} +const SymplecticEulerB{M, N, FT, AT} = SymplecticEuler{M, N, FT, AT, :B} + +function SymplecticEulerA(se::SymbolicKineticEnergy) + gradient_function = build_gradient(se) + SymplecticEuler{build_gradient(se), :A} +end + +function SymplecticEulerB(se::SymbolicPotentialEnergy) + gradient_function = build_gradient(se) + SymplecticEuler{build_gradient(se), :B} +end + +(integrator::SymplecticEulerA)(qp::QPT, params::NeuralNetworkParameters) = (qp.q + integrator.gradient_function(qp.p, params), qp.p) +(integrator::SymplecticEulerB)(qp::QPT, params::NeuralNetworkParameters) = (qp.q, qp.p - integrator.gradient_function(qp.q, params)) SymbolicPotentialEnergy(args...; kwargs...) = SymbolicEnergy{:potential}(args...; kwargs...) SymbolicKineticEnergy(args...; kwargs...) = KineticEnergy{:kinetic}(args...; kwargs...) -GHNN_integrator_default = nothing - """ GeneralizedHamiltonianArchitecture <: HamiltonianArchitecture @@ -58,18 +123,34 @@ The constructor takes the following input arguments: 1. `dim`: system dimension, 2. `width = dim`: width of the hidden layer. By default this is equal to `dim`, 3. `nhidden = $(HNN_nhidden_default)`: the number of hidden layers, -4. `activation = $(HNN_activation_default)`: the activation function used in the GHNN, -5. `integrator = $(GHNN_integrator_default)`: the integrator that is used to design the GHNN. +4. `n_integrators`: the number of integrators used in the GHNN. +5. `activation = $(HNN_activation_default)`: the activation function used in the GHNN, """ struct GeneralizedHamiltonianArchitecture{AT, IT} <: HamiltonianArchitecture{AT} dim::Int width::Int nhidden::Int + n_integrators::Int activation::AT - integrator::IT - function GeneralizedHamiltonianArchitecture(dim, width=dim, nhidden=HNN_nhidden_default, activation=HNN_activation_default, integrator=GHNN_integrator_default) - error("GHNN still has to be implemented!") - new{typeof(activation), typeof(integrator)}(dim, width, nhidden, activation, integrator) + function GeneralizedHamiltonianArchitecture(dim, width=dim, nhidden=HNN_nhidden_default, n_integrators::Integer=1, activation=HNN_activation_default) + new{typeof(activation)}(dim, width, nhidden, n_integrators, activation) end +end + +function Chain(ghnn_arch::GeneralizedHamiltonianArchitecture) + c = () + kinetic_energy = SymbolicKineticEnergy(ghnn_arch.dim, ghnn_arch.width, ghnn_arch.nhidden, ghnn_arch.parameter_length, ghnn_arch.parameter_convert, ghnn_arch.activation) + potential_energy = SymbolicPotentialEnergy(ghnn_arch.dim, ghnn_arch.width, ghnn_arch.nhidden, ghnn_arch.parameter_length, ghnn_arch.parameter_convert, ghnn_arch.activation) + + for _ in 1:n_integrators + c = (c..., SymplecticEulerA(kinetic_energy)) + c = (c..., SymplecticEulerB(potential_energy)) + end + + c +end + +function HNNLoss(arch::GeneralizedHamiltonianArchitecture) + end \ No newline at end of file diff --git a/src/architectures/standard_hamiltonian_neural_network.jl b/src/architectures/standard_hamiltonian_neural_network.jl index 5d70304d..920cee8b 100644 --- a/src/architectures/standard_hamiltonian_neural_network.jl +++ b/src/architectures/standard_hamiltonian_neural_network.jl @@ -58,13 +58,13 @@ Compute an executable expression of the Hamiltonian vector field of a [`Hamilton This first computes a symbolic expression of the vector field using [`symbolic_hamiltonian_vector_field`](@ref). """ -function hamiltonian_vector_field(arch::HamiltonianArchitecture) +function hamiltonian_vector_field(arch::StandardHamiltonianArchitecture) nn = SymbolicNeuralNetwork(arch) hvf = symbolic_hamiltonian_vector_field(nn) SymbolicNeuralNetworks.build_nn_function(hvf, nn.params, nn.input) end -function Chain(arch::HamiltonianArchitecture) +function Chain(arch::StandardHamiltonianArchitecture) inner_layers = Tuple( [Dense(arch.width, arch.width, arch.activation) for _ in 1:arch.nhidden] ) diff --git a/test/generalized_hamiltonian_neural_networks_test.jl b/test/generalized_hamiltonian_neural_networks_test.jl new file mode 100644 index 00000000..5cf48d12 --- /dev/null +++ b/test/generalized_hamiltonian_neural_networks_test.jl @@ -0,0 +1,10 @@ +using GeometricMachineLearning +using GeometricProblems.HarmonicOscillator: odeproblem, default_parameters +using GeometricIntegrators + +sol = integrate(odeproblem(), ImplicitMidpoint()) +dl = DataLoader(sol) + +arch = ghnn(2) +nn = NeuralNetwork(arch) + From fab98623a3b6141693f11ab5ad78f05323543034 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 14 Jul 2025 14:12:01 +0200 Subject: [PATCH 03/50] Changed name to remove conflicts with GHNNs. --- src/training_method/symplectic_euler.jl | 20 ++++++++++---------- test/train!/test_method.jl | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/training_method/symplectic_euler.jl b/src/training_method/symplectic_euler.jl index fab5de86..ea06c5b3 100644 --- a/src/training_method/symplectic_euler.jl +++ b/src/training_method/symplectic_euler.jl @@ -1,27 +1,27 @@ -abstract type SymplecticEuler <: HnnTrainingMethod end +abstract type SymplecticEulerIntegrator <: HnnTrainingMethod end -struct SymplecticEulerA <: SymplecticEuler end -struct SymplecticEulerB <: SymplecticEuler end +struct SymplecticEulerIntegratorA <: SymplecticEulerIntegrator end +struct SymplecticEulerIntegratorB <: SymplecticEulerIntegrator end SEuler(;sqdist = sqeuclidean) = SEulerA(sqdist = sqdist) -SEulerA(;sqdist = sqeuclidean) = TrainingMethod{SymplecticEulerA, PhaseSpaceSymbol, TrajectoryData, typeof(sqdist)}(sqdist) -SEulerB(;sqdist = sqeuclidean) = TrainingMethod{SymplecticEulerB, PhaseSpaceSymbol, TrajectoryData, typeof(sqdist)}(sqdist) +SEulerA(;sqdist = sqeuclidean) = TrainingMethod{SymplecticEulerIntegratorA, PhaseSpaceSymbol, TrajectoryData, typeof(sqdist)}(sqdist) +SEulerB(;sqdist = sqeuclidean) = TrainingMethod{SymplecticEulerIntegratorB, PhaseSpaceSymbol, TrajectoryData, typeof(sqdist)}(sqdist) -function loss_single(::TrainingMethod{SymplecticEulerA}, nn::AbstractNeuralNetwork{<:HamiltonianArchitecture}, qₙ, qₙ₊₁, pₙ, pₙ₊₁, Δt, params = params(nn)) +function loss_single(::TrainingMethod{SymplecticEulerIntegratorA}, nn::AbstractNeuralNetwork{<:HamiltonianArchitecture}, qₙ, qₙ₊₁, pₙ, pₙ₊₁, Δt, params = params(nn)) dH = vectorfield(nn, [qₙ₊₁...,pₙ...], params) sqeuclidean(dH[1],(qₙ₊₁-qₙ)/Δt) + sqeuclidean(dH[2],(pₙ₊₁-pₙ)/Δt) end -function loss_single(::TrainingMethod{SymplecticEulerB}, nn::AbstractNeuralNetwork{<:HamiltonianArchitecture}, qₙ, qₙ₊₁, pₙ, pₙ₊₁, Δt, params = params(nn)) +function loss_single(::TrainingMethod{SymplecticEulerIntegratorB}, nn::AbstractNeuralNetwork{<:HamiltonianArchitecture}, qₙ, qₙ₊₁, pₙ, pₙ₊₁, Δt, params = params(nn)) dH = vectorfield(nn, [qₙ...,pₙ₊₁...], params) sqeuclidean(dH[1],(qₙ₊₁-qₙ)/Δt) + sqeuclidean(dH[2],(pₙ₊₁-pₙ)/Δt) end -get_loss(::TrainingMethod{<:SymplecticEuler}, ::AbstractNeuralNetwork{<:HamiltonianArchitecture}, data::TrainingData{<:DataSymbol{<:PhaseSpaceSymbol}}, args) = (get_data(data,:q, args...), get_data(data,:q, next(args...)...), get_data(data,:p, args...), get_data(data,:p,next(args...)...), get_Δt(data)) +get_loss(::TrainingMethod{<:SymplecticEulerIntegrator}, ::AbstractNeuralNetwork{<:HamiltonianArchitecture}, data::TrainingData{<:DataSymbol{<:PhaseSpaceSymbol}}, args) = (get_data(data,:q, args...), get_data(data,:q, next(args...)...), get_data(data,:p, args...), get_data(data,:p,next(args...)...), get_Δt(data)) -loss(ti::TrainingMethod{<:SymplecticEuler}, nn::AbstractNeuralNetwork{<:HamiltonianArchitecture}, data::TrainingData{<:DataSymbol{<:PhaseSpaceSymbol}}, index_batch = eachindex(ti, data), params = params(nn)) = +loss(ti::TrainingMethod{<:SymplecticEulerIntegrator}, nn::AbstractNeuralNetwork{<:HamiltonianArchitecture}, data::TrainingData{<:DataSymbol{<:PhaseSpaceSymbol}}, index_batch = eachindex(ti, data), params = params(nn)) = mapreduce(args->loss_single(Zygote.ignore_derivatives(ti), nn, get_loss(ti, nn, data, args)..., params),+, index_batch) -min_length_batch(::SymplecticEuler) = 2 \ No newline at end of file +min_length_batch(::SymplecticEulerIntegrator) = 2 \ No newline at end of file diff --git a/test/train!/test_method.jl b/test/train!/test_method.jl index fbce31e8..4df65d03 100644 --- a/test/train!/test_method.jl +++ b/test/train!/test_method.jl @@ -24,7 +24,7 @@ exacthnn = ExactHnn() sympeuler = SEuler() -@test GeometricMachineLearning.type(sympeuler) == SymplecticEulerA +@test GeometricMachineLearning.type(sympeuler) == SymplecticEulerIntegratorA @test symbols(sympeuler) == PhaseSpaceSymbol @test shape(sympeuler) == TrajectoryData @test min_length_batch(sympeuler) == 2 @@ -64,7 +64,7 @@ midpointlnn = VariaMidPoint() ######################################### @testerror GeometricMachineLearning.type(default_Method(sympnet, tra_pos_data)) -@test GeometricMachineLearning.type(default_method(hnn, tra_ps_data)) == SymplecticEulerA +@test GeometricMachineLearning.type(default_method(hnn, tra_ps_data)) == SymplecticEulerIntegratorA @test GeometricMachineLearning.type(default_method(hnn, sam_dps_data)) == HnnExactMethod @test GeometricMachineLearning.type(default_method(sympnet, tra_ps_data)) == BasicSympNetMethod @test GeometricMachineLearning.type(default_method(lnn, tra_pos_data)) == VariationalMidPointMethod From fd6cf94bb9d5856935b52e74ce7cf8a2f16691bd Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 15 Jul 2025 10:54:57 +0200 Subject: [PATCH 04/50] Made sure the case where QPT consists of vectors of different types also works. --- src/utils.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils.jl b/src/utils.jl index 0fe9b3b5..c6b37128 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -151,7 +151,7 @@ qp = (q = [1, 2], p = [3, 4]) ``` """ -const QPT{T} = NamedTuple{(:q, :p), Tuple{AT, AT}} where {T, AT <: AbstractArray{T}} +const QPT{T} = NamedTuple{(:q, :p), Tuple{AT₁, AT₂}} where {T, N, AT₁ <: AbstractArray{T, N}, AT₂ <: AbstractArray{T, N}} @doc raw""" QPTOAT From 845f20cd1e8902f162d0a82ca683196c1c0bbc5a Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 15 Jul 2025 15:40:02 +0200 Subject: [PATCH 05/50] Finished implementation of generalized hamiltonian neural networks (without data loader). --- .../hamiltonian_neural_network.md | 6 +- src/GeometricMachineLearning.jl | 6 +- .../generalized_hamiltonian_neural_network.jl | 180 ++++++++++++++---- src/utils.jl | 6 +- ...alized_hamiltonian_neural_networks_test.jl | 19 +- test/runtests.jl | 1 + 6 files changed, 170 insertions(+), 48 deletions(-) diff --git a/docs/src/architectures/hamiltonian_neural_network.md b/docs/src/architectures/hamiltonian_neural_network.md index 1b8c68b6..12380ef0 100644 --- a/docs/src/architectures/hamiltonian_neural_network.md +++ b/docs/src/architectures/hamiltonian_neural_network.md @@ -42,12 +42,16 @@ Here the derivatives (i.e. vector field data) ``\dot{q}_i^{(t)}`` and ``\dot{p}_ ## Library Functions ```@docs -GeometricMachineLearning.hamiltonian_vector_field(::HamiltonianArchitecture) +GeometricMachineLearning.hamiltonian_vector_field(::StandardHamiltonianArchitecture) GeometricMachineLearning.HamiltonianArchitecture GeometricMachineLearning.StandardHamiltonianArchitecture GeometricMachineLearning.HNNLoss GeometricMachineLearning.symbolic_hamiltonian_vector_field(::GeometricMachineLearning.SymbolicNeuralNetwork) GeometricMachineLearning.SymbolicPullback(::HamiltonianArchitecture) +GeometricMachineLearning.SymbolicEnergy +GeometricMachineLearning.SymbolicPotentialEnergy +GeometricMachineLearning.SymbolicKineticEnergy +GeometricMachineLearning.build_gradient GeometricMachineLearning.GeneralizedHamiltonianArchitecture GeometricMachineLearning._processing ``` \ No newline at end of file diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl index 55d149fc..3405abcc 100644 --- a/src/GeometricMachineLearning.jl +++ b/src/GeometricMachineLearning.jl @@ -19,10 +19,10 @@ module GeometricMachineLearning import LazyArrays import SymbolicNeuralNetworks import SymbolicNeuralNetworks: input_dimension, output_dimension, SymbolicPullback - using SymbolicNeuralNetworks: derivative, _get_contents, _get_params, SymbolicNeuralNetwork + using SymbolicNeuralNetworks: derivative, _get_contents, _get_params, SymbolicNeuralNetwork, AbstractSymbolicNeuralNetwork using Symbolics: @variables, substitute - import ParameterHandling: flatten + import ParameterHandling import AbstractNeuralNetworks: Architecture, Model, AbstractExplicitLayer, AbstractExplicitCell, AbstractNeuralNetwork , NeuralNetwork, UnknownArchitecture, FeedForwardLoss import AbstractNeuralNetworks: Chain, GridCell @@ -362,8 +362,6 @@ module GeometricMachineLearning include("training/train.jl") - export SymplecticEuler - export SymplecticEulerA, SymplecticEulerB export SEuler, SEulerA, SEulerB include("training_method/symplectic_euler.jl") diff --git a/src/architectures/generalized_hamiltonian_neural_network.jl b/src/architectures/generalized_hamiltonian_neural_network.jl index f7edb304..cfa829ad 100644 --- a/src/architectures/generalized_hamiltonian_neural_network.jl +++ b/src/architectures/generalized_hamiltonian_neural_network.jl @@ -11,25 +11,33 @@ struct SymbolicEnergy{AT <: Activation, PT, Kinetic} parameter_convert::PT activation::AT - function SymbolicEnergy{Kinetic}(dim, width=dim, nhidden=HNN_nhidden_default, activation=HNN_activation_default; parameters::OptionalParameters=NullParameters()) where {Kinetic} + function SymbolicEnergy(dim, width=dim, nhidden=HNN_nhidden_default, activation::Activation=HNN_activation_default; parameters::OptionalParameters=NullParameters(), type) @assert iseven(dim) "The input dimension must be an even integer!" - flattened_parameters = flatten(parameters) - parameter_length = length(flattened_parameters[2]) - new{typeof(activation), typeof(flattened_parameters[2]), Kinetic}(dim, width, nhidden, parameter_length, flattened_parameters[2], activation) + flattened_parameters = ParameterHandling.flatten(parameters) + parameter_length = length(flattened_parameters[1]) + new{typeof(activation), typeof(flattened_parameters[2]), type}(dim, width, nhidden, parameter_length, flattened_parameters[2], activation) end end -ParameterHandling.flatten(::NullParameters) = flatten(NamedTuple()) +ParameterHandling.flatten(::NullParameters) = ParameterHandling.flatten(NamedTuple()) """ SymbolicPotentialEnergy +A `const` derived from [`SymbolicEnergy`](@ref). + # Constructors -```julia -SymbolicPotentialEnergy(dim) +```jldoctest; setup=:(using GeometricMachineLearning) +julia> params, dim = (m = 1., ω = π / 2), 2 +((m = 1.0, ω = 1.5707963267948966), 2) + +julia> se = GeometricMachineLearning.SymbolicPotentialEnergy(dim; parameters = params); + ``` +In practice we use `SymbolicPotentialEnergy` (and [`SymbolicKineticEnergy`](@ref)) together with [`build_gradient(::SymbolicEnergy)`](@ref). + # Parameter Dependence """ const SymbolicPotentialEnergy{AT, PT} = SymbolicEnergy{AT, PT, :potential} @@ -37,14 +45,17 @@ const SymbolicPotentialEnergy{AT, PT} = SymbolicEnergy{AT, PT, :potential} """ SymbolicKineticEnergy -# Constructors +A `const` derived from [`SymbolicEnergy`](@ref). -```julia +# Constructors -``` +See [`SymbolicPotentialEnergy`](@ref). """ const SymbolicKineticEnergy{AT, PT} = SymbolicEnergy{AT, PT, :kinetic} +SymbolicPotentialEnergy(args...; kwargs...) = SymbolicEnergy(args...; type = :potential, kwargs...) +SymbolicKineticEnergy(args...; kwargs...) = SymbolicEnergy(args...; type = :kinetic, kwargs...) + function Chain(se::SymbolicEnergy) inner_layers = Tuple( [Dense(se.width, se.width, se.activation) for _ in 1:se.nhidden] @@ -57,14 +68,14 @@ function Chain(se::SymbolicEnergy) ) end -function SymbolicNeuralNetworks.Jacobian(f::EqT, nn::AbstractSymbolicNeuralNetwork, dim2::Integer) +function SymbolicNeuralNetworks.Jacobian(f::SymbolicNeuralNetworks.EqT, nn::AbstractSymbolicNeuralNetwork, dim2::Integer) # make differential - Dx = symbolic_differentials(nn.input)[1:dim2] + Dx = SymbolicNeuralNetworks.symbolic_differentials(nn.input)[1:dim2] # Evaluation of gradient - s∇f = hcat([expand_derivatives.(Symbolics.scalarize(dx(f))) for dx in Dx]...) + s∇f = hcat([SymbolicNeuralNetworks.expand_derivatives.(SymbolicNeuralNetworks.Symbolics.scalarize(dx(f))) for dx in Dx]...) - Jacobian(f, s∇f, nn) + SymbolicNeuralNetworks.Jacobian(f, s∇f, nn) end function SymbolicNeuralNetworks.Jacobian(nn::AbstractSymbolicNeuralNetwork, dim2::Integer) @@ -72,43 +83,119 @@ function SymbolicNeuralNetworks.Jacobian(nn::AbstractSymbolicNeuralNetwork, dim2 # Evaluation of the symbolic output soutput = nn.model(nn.input, params(nn)) - Jacobian(soutput, nn, dim2) + SymbolicNeuralNetworks.Jacobian(soutput, nn, dim2) end +""" + build_gradient(se) + +Build a gradient function from a [`SymbolicEnergy`](@ref) `se`. + +# Examples + +```jldoctest; setup=:(using GeometricMachineLearning; using GeometricMachineLearning: SymbolicPotentialEnergy, build_gradient, concatenate_array_with_parameters; using GeometricMachineLearning.GeometricBase: OptionalParameters; using Random; Random.seed!(123)) +params, dim = (m = 1., ω = π / 2), 4 + +se = SymbolicPotentialEnergy(dim; parameters = params) + +network_params = NeuralNetwork(Chain(se)).params + +built_grad = build_gradient(se) +grad(qp::AbstractArray, problem_params::OptionalParameters, params::NeuralNetworkParameters) = built_grad(concatenate_array_with_parameters(qp, problem_params), params) + +grad(rand(2), params, network_params) + +# output + +2×1 Matrix{Float64}: + 0.069809944230798 + 0.30150270218327446 +``` +""" function build_gradient(se::SymbolicEnergy) model = Chain(se) nn = SymbolicNeuralNetwork(model) □ = SymbolicNeuralNetworks.Jacobian(nn, se.dim÷2) - SymbolicNeuralNetworks.build_nn_function(□, nn.params, nn.input) + SymbolicNeuralNetworks.build_nn_function(SymbolicNeuralNetworks.derivative(□)', nn.params, nn.input) end -struct SymplecticEuler{M, N, FT<:Callable, AT<:Architecture, type} <: AbstractExplicitLayer{M, N} +struct SymplecticEuler{M, N, FT<:Base.Callable, MT<:Chain, type, Last} <: AbstractExplicitLayer{M, N} gradient_function::FT - energy_architecture::AT + energy_model::MT end -function initialparameters(integrator::SymplecticEuler) - +function initialparameters(rng::Random.AbstractRNG, init_weight::AbstractNeuralNetworks.Initializer, integrator::SymplecticEuler, backend::KernelAbstractions.Backend, ::Type{T}) where {T} + initialparameters(rng, init_weight, integrator.energy_model, backend, T) end -const SymplecticEulerA{M, N, FT, AT} = SymplecticEuler{M, N, FT, AT, :A} -const SymplecticEulerB{M, N, FT, AT} = SymplecticEuler{M, N, FT, AT, :B} +const SymplecticEulerA{M, N, FT, AT, Last} = SymplecticEuler{M, N, FT, AT, :A} +const SymplecticEulerB{M, N, FT, AT, Last} = SymplecticEuler{M, N, FT, AT, :B} -function SymplecticEulerA(se::SymbolicKineticEnergy) +function SymplecticEulerA(se::SymbolicKineticEnergy; last_step::Bool=false) gradient_function = build_gradient(se) - SymplecticEuler{build_gradient(se), :A} + c = Chain(se) + SymplecticEuler{se.dim, se.dim, typeof(gradient_function), typeof(c), :A, last_step}(gradient_function, c) end -function SymplecticEulerB(se::SymbolicPotentialEnergy) +function SymplecticEulerB(se::SymbolicPotentialEnergy; last_step::Bool=false) gradient_function = build_gradient(se) - SymplecticEuler{build_gradient(se), :B} + c = Chain(se) + SymplecticEuler{se.dim, se.dim, typeof(gradient_function), typeof(c), :B, last_step}(gradient_function, c) end -(integrator::SymplecticEulerA)(qp::QPT, params::NeuralNetworkParameters) = (qp.q + integrator.gradient_function(qp.p, params), qp.p) -(integrator::SymplecticEulerB)(qp::QPT, params::NeuralNetworkParameters) = (qp.q, qp.p - integrator.gradient_function(qp.q, params)) +function concatenate_array_with_parameters(qp::AbstractVector, params::OptionalParameters) + vcat(qp, ParameterHandling.flatten(params)[1]) +end -SymbolicPotentialEnergy(args...; kwargs...) = SymbolicEnergy{:potential}(args...; kwargs...) -SymbolicKineticEnergy(args...; kwargs...) = KineticEnergy{:kinetic}(args...; kwargs...) +function concatenate_array_with_parameters(qp::AbstractMatrix, params::OptionalParameters) + hcat((concatenate_array_with_parameters(qp[:, i], params) for i in axes(qp, 2))...) +end + +function (integrator::SymplecticEulerA{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.p, problem_params) + (q = @view((qp.q + integrator.gradient_function(input, params))[:, 1]), p = qp.p) +end + +function (integrator::SymplecticEulerB{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.q, problem_params) + (q = qp.q, p = @view((qp.p - integrator.gradient_function(input, params))[:, 1])) +end + +function (integrator::SymplecticEulerA{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.p, problem_params) + ((q = @view((qp.q + integrator.gradient_function(input, params))[:, 1]), p = qp.p), problem_params) +end + +function (integrator::SymplecticEulerB{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.q, problem_params) + ((q = qp.q, p = @view((qp.p - integrator.gradient_function(input, params))[:, 1])), problem_params) +end + +function (integrator::SymplecticEuler)(qp_params::Tuple{<:QPTOAT2, <:OptionalParameters}, params::NeuralNetworkParameters) + integrator(qp_params..., params) +end + +function (integrator::SymplecticEuler)(::TT, ::NeuralNetworkParameters) where {TT <: Tuple} + error("The input is of type $(TT). This shouldn't be the case!") +end + +function (integrator::SymplecticEuler{M, N, FT, AT, Type, false})(qp::AbstractArray, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT, Type} + @assert iseven(size(qp, 1)) + n = size(qp, 1)÷2 + qp_split = assign_q_and_p(qp, n) + evaluated = integrator(qp_split, problem_params, params)[1] + (vcat(evaluated.q, evaluated.p), problem_params) +end + +function (integrator::SymplecticEuler{M, N, FT, AT, Type, true})(qp::AbstractArray, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT, Type} + @assert iseven(size(qp, 1)) + n = size(qp, 1)÷2 + qp_split = assign_q_and_p(qp, n) + evaluated = integrator(qp_split, problem_params, params) + vcat(evaluated.q, evaluated.p) +end + +(integrator::SymplecticEuler)(qp::QPTOAT2, params::NeuralNetworkParameters) = integrator(qp, NullParameters(), params) """ GeneralizedHamiltonianArchitecture <: HamiltonianArchitecture @@ -126,31 +213,44 @@ The constructor takes the following input arguments: 4. `n_integrators`: the number of integrators used in the GHNN. 5. `activation = $(HNN_activation_default)`: the activation function used in the GHNN, """ -struct GeneralizedHamiltonianArchitecture{AT, IT} <: HamiltonianArchitecture{AT} +struct GeneralizedHamiltonianArchitecture{AT, PT <: OptionalParameters} <: HamiltonianArchitecture{AT} dim::Int width::Int nhidden::Int n_integrators::Int + parameters::PT activation::AT - function GeneralizedHamiltonianArchitecture(dim, width=dim, nhidden=HNN_nhidden_default, n_integrators::Integer=1, activation=HNN_activation_default) - new{typeof(activation)}(dim, width, nhidden, n_integrators, activation) + function GeneralizedHamiltonianArchitecture(dim; width=dim, nhidden=HNN_nhidden_default, n_integrators::Integer=1, activation=HNN_activation_default, parameters=NullParameters()) + new{typeof(activation), typeof(parameters)}(dim, width, nhidden, n_integrators, parameters, activation) end end +@generated function AbstractNeuralNetworks.applychain(layers::Tuple, x::Tuple{<:QPTOAT2, <:OptionalParameters}, ps::Tuple) + N = length(fieldtypes((layers))) + x_symbols = vcat([:x], [gensym() for _ in 1:N]) + calls = [:(($(x_symbols[i + 1])) = layers[$i]($(x_symbols[i]), ps[$i])) for i in 1:N] + push!(calls, :(return $(x_symbols[N + 1]))) + return Expr(:block, calls...) +end + function Chain(ghnn_arch::GeneralizedHamiltonianArchitecture) c = () - kinetic_energy = SymbolicKineticEnergy(ghnn_arch.dim, ghnn_arch.width, ghnn_arch.nhidden, ghnn_arch.parameter_length, ghnn_arch.parameter_convert, ghnn_arch.activation) - potential_energy = SymbolicPotentialEnergy(ghnn_arch.dim, ghnn_arch.width, ghnn_arch.nhidden, ghnn_arch.parameter_length, ghnn_arch.parameter_convert, ghnn_arch.activation) + kinetic_energy = SymbolicKineticEnergy(ghnn_arch.dim, ghnn_arch.width, ghnn_arch.nhidden, ghnn_arch.activation; parameters=ghnn_arch.parameters) + potential_energy = SymbolicPotentialEnergy(ghnn_arch.dim, ghnn_arch.width, ghnn_arch.nhidden, ghnn_arch.activation; parameters=ghnn_arch.parameters) - for _ in 1:n_integrators - c = (c..., SymplecticEulerA(kinetic_energy)) - c = (c..., SymplecticEulerB(potential_energy)) + for n in 1:ghnn_arch.n_integrators + c = (c..., SymplecticEulerA(kinetic_energy; last_step = false)) + c = n == ghnn_arch.n_integrators ? (c..., SymplecticEulerB(potential_energy; last_step=true)) : (c..., SymplecticEulerB(potential_energy; last_step=false)) end - c + Chain(c...) end -function HNNLoss(arch::GeneralizedHamiltonianArchitecture) +function (nn::NeuralNetwork{GT})(qp::QPTOAT2, problem_params::OptionalParameters) where {GT <: GeneralizedHamiltonianArchitecture} + nn.model(qp, problem_params, params(nn)) +end +function (model::Chain)(qp::QPTOAT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) + model((qp, problem_params), params) end \ No newline at end of file diff --git a/src/utils.jl b/src/utils.jl index c6b37128..9cf654ea 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -151,7 +151,9 @@ qp = (q = [1, 2], p = [3, 4]) ``` """ -const QPT{T} = NamedTuple{(:q, :p), Tuple{AT₁, AT₂}} where {T, N, AT₁ <: AbstractArray{T, N}, AT₂ <: AbstractArray{T, N}} +const QPT{T} = NamedTuple{(:q, :p), Tuple{AT, AT}} where {T, N, AT <: AbstractArray{T, N}} + +const QPT2{T} = NamedTuple{(:q, :p), Tuple{AT₁, AT₂}} where {T, N, AT₁ <: AbstractArray{T, N}, AT₂ <: AbstractArray{T, N}} @doc raw""" QPTOAT @@ -165,6 +167,8 @@ This could be data in ``(q, p)\in\mathbb{R}^{2d}`` form or come from an arbitrar """ const QPTOAT{T} = Union{QPT{T}, AbstractArray{T}} where T +const QPTOAT2{T} = Union{QPT2{T}, AbstractArray{T}} where T + Base.:≈(qp₁::QPT, qp₂::QPT) = (qp₁.q ≈ qp₂.q) & (qp₁.p ≈ qp₂.p) _eltype(x) = eltype(x) diff --git a/test/generalized_hamiltonian_neural_networks_test.jl b/test/generalized_hamiltonian_neural_networks_test.jl index 5cf48d12..cdd4030d 100644 --- a/test/generalized_hamiltonian_neural_networks_test.jl +++ b/test/generalized_hamiltonian_neural_networks_test.jl @@ -1,10 +1,25 @@ using GeometricMachineLearning +using GeometricMachineLearning: OptionalParameters, OneInitializer using GeometricProblems.HarmonicOscillator: odeproblem, default_parameters using GeometricIntegrators +using Test sol = integrate(odeproblem(), ImplicitMidpoint()) +dim = length(sol.problem.ics.q) + dl = DataLoader(sol) -arch = ghnn(2) -nn = NeuralNetwork(arch) +function test_ghnn_without_parameters(dim::Integer = dim) + arch = GeneralizedHamiltonianArchitecture(dim) + nn = NeuralNetwork(arch; initializer=OneInitializer()) + @test nn([1., 1.]) ≈ [1.003217200759985, 0.9968055760434815] +end + +function test_ghnn_with_parameters(dim::Integer = dim, parameters::OptionalParameters = default_parameters) + arch = GeneralizedHamiltonianArchitecture(dim, parameters = parameters) + nn = NeuralNetwork(arch; initializer=OneInitializer()) + @test nn([1., 1.], parameters) ≈ [1.0000350420844089, 0.9999649603746436] +end +test_ghnn_without_parameters() +test_ghnn_with_parameters() \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 7f0bd48f..d52f9dda 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -27,6 +27,7 @@ using Documenter: doctest @safetestset "Gradient Layer " begin include("layers/gradient_layer_tests.jl") end @safetestset "Test symplecticity of upscaling layer " begin include("layers/sympnet_layers_test.jl") end @safetestset "Hamiltonian Neural Network " begin include("hamiltonian_neural_network_tests.jl") end +@safetestset "Generalized Hamiltonian Neural Network " begin include("generalized_hamiltonian_neural_networks_test.jl") end @safetestset "Manifold Neural Network Layers " begin include("layers/manifold_layers.jl") end @safetestset "Custom tensor matrix multiplication " begin include("kernels/tensor_mat_mul.jl") end From 760ba7ea2a1e95fdb470cff790a989a80ccd8c32 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Fri, 25 Jul 2025 13:27:57 +0200 Subject: [PATCH 06/50] Added forcing/dissipation layers. --- src/GeometricMachineLearning.jl | 1 + src/layers/forcing_dissipation_layers.jl | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 src/layers/forcing_dissipation_layers.jl diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl index 3405abcc..a1d4d67c 100644 --- a/src/GeometricMachineLearning.jl +++ b/src/GeometricMachineLearning.jl @@ -153,6 +153,7 @@ module GeometricMachineLearning include("optimizers/manifold_related/retractions.jl") include("layers/sympnets.jl") + include("layers/forcing_dissipation_layers.jl") include("layers/bias_layer.jl") include("layers/resnet.jl") include("layers/manifold_layer.jl") diff --git a/src/layers/forcing_dissipation_layers.jl b/src/layers/forcing_dissipation_layers.jl new file mode 100644 index 00000000..c2d31928 --- /dev/null +++ b/src/layers/forcing_dissipation_layers.jl @@ -0,0 +1,6 @@ +@doc raw""" + ForcingLayer <: AbstractExplicitLayer + +An abstract type that summarizes layers that can learn dissipative terms, but not conservative ones. +""" +abstract type ForcingLayer{M, N} <: AbstractExplicitLayer{M, N} end \ No newline at end of file From ffaa11c0b69835639301ea83e8d43301a5495d0e Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 29 Jul 2025 21:30:02 +0200 Subject: [PATCH 07/50] Finished implementing the parametric data loader including a test. --- src/GeometricMachineLearning.jl | 5 +- src/data_loader/batch.jl | 7 +- src/data_loader/data_loader.jl | 2 +- src/data_loader/optimize.jl | 4 +- src/data_loader/parametric_data_loader.jl | 140 ++++++++++++++++++ src/loss/losses.jl | 6 + .../parametric_data_loader_test.jl | 35 +++++ test/runtests.jl | 1 + test/training_phnn.jl | 30 ++++ 9 files changed, 223 insertions(+), 7 deletions(-) create mode 100644 src/data_loader/parametric_data_loader.jl create mode 100644 test/data_loader/parametric_data_loader_test.jl create mode 100644 test/training_phnn.jl diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl index a1d4d67c..a318a1db 100644 --- a/src/GeometricMachineLearning.jl +++ b/src/GeometricMachineLearning.jl @@ -304,7 +304,7 @@ module GeometricMachineLearning include("pullbacks/zygote_pullback.jl") include("pullbacks/symbolic_hnn_pullback.jl") - export DataLoader, onehotbatch + export DataLoader, ParametricDataLoader, onehotbatch export Batch, optimize_for_one_epoch! include("data_loader/tensor_assign.jl") include("data_loader/matrix_assign.jl") @@ -312,6 +312,9 @@ module GeometricMachineLearning include("data_loader/batch.jl") include("data_loader/optimize.jl") + include("data_loader/parametric_data_loader.jl") + + export default_optimizer include("optimizers/default_optimizer.jl") diff --git a/src/data_loader/batch.jl b/src/data_loader/batch.jl index a575ab31..d41ba001 100644 --- a/src/data_loader/batch.jl +++ b/src/data_loader/batch.jl @@ -140,18 +140,19 @@ function number_of_batches(dl::DataLoader{T, AT, OT, :RegularData}, batch::Batch Int(ceil(dl.input_time_steps * dl.n_params / batch.batch_size)) end -function batch_over_two_axes(batch::Batch, number_columns::Int, third_dim::Int, dl::DataLoader) +function batch_over_two_axes(batch::Batch, number_columns::Integer, third_dim::Integer, n_batches::Integer) time_indices = shuffle(1:number_columns) parameter_indices = shuffle(1:third_dim) complete_indices = Iterators.product(time_indices, parameter_indices) |> collect |> vec batches = () - n_batches = number_of_batches(dl, batch) for batch_number in 1:(n_batches - 1) batches = (batches..., complete_indices[(batch_number - 1) * batch.batch_size + 1 : batch_number * batch.batch_size]) end (batches..., complete_indices[(n_batches - 1) * batch.batch_size + 1:end]) end +batch_over_two_axes(batch::Batch, number_of_columns::Integer, third_dim::Integer, dl::DataLoader) = batch_over_two_axes(batch, number_of_columns, third_dim, number_of_batches(dl, batch)) + function (batch::Batch)(dl::DataLoader{T, BT, OT, :RegularData}) where {T, AT<:AbstractArray{T, 3}, BT<:Union{AT, NamedTuple{(:q, :p), Tuple{AT, AT}}}, OT} batch_over_two_axes(batch, dl.input_time_steps, dl.n_params, dl) end @@ -186,7 +187,7 @@ end output[i, j, k] = data[i, indices[1, k] + seq_length + j - 1, indices[2, k]] end -# this is neeced if we want to use the vector of tuples in a kernel +# this is needed if we want to use the vector of tuples in a kernel function convert_vector_of_tuples_to_matrix(backend::Backend, batch_indices_tuple::Vector{Tuple{Int, Int}}) _batch_size = length(batch_indices_tuple) diff --git a/src/data_loader/data_loader.jl b/src/data_loader/data_loader.jl index 70e39ea7..2d012505 100644 --- a/src/data_loader/data_loader.jl +++ b/src/data_loader/data_loader.jl @@ -145,7 +145,7 @@ function DataLoader(data::AbstractMatrix{T}; autoencoder=true, suppress_info = f input_dim, time_steps = size(data) reshaped_data = reshape(data, input_dim, time_steps, 1) return DataLoader{T, typeof(reshaped_data), Nothing, :TimeSeries}(reshaped_data, nothing, input_dim, time_steps, 1, nothing, nothing) - elseif autoencoder == true + elseif autoencoder == true input_dim, n_params = size(data) reshaped_data = reshape(data, input_dim, 1, n_params) return DataLoader{T, typeof(reshaped_data), Nothing, :RegularData}(reshaped_data, nothing, input_dim, 1, n_params, nothing, nothing) diff --git a/src/data_loader/optimize.jl b/src/data_loader/optimize.jl index 4e41b228..5a9cb007 100644 --- a/src/data_loader/optimize.jl +++ b/src/data_loader/optimize.jl @@ -86,8 +86,8 @@ _copy(qp::QPT) = (q = copy(qp.q), p = copy(qp.p)) _copy(t::Tuple{<:QPTOAT, <:QPTOAT}) = _copy.(t) function (o::Optimizer)(nn::NeuralNetwork, - dl::DataLoader, - batch::Batch, + dl, + batch::Batch, n_epochs::Integer, loss::NetworkLoss, _pullback::AbstractPullback = ZygotePullback(loss); show_progress = true) diff --git a/src/data_loader/parametric_data_loader.jl b/src/data_loader/parametric_data_loader.jl new file mode 100644 index 00000000..cb49092e --- /dev/null +++ b/src/data_loader/parametric_data_loader.jl @@ -0,0 +1,140 @@ +""" + ParametricDataLoader + +Very similar to [`DataLoader`](@ref), but can deal with parametric problems. +""" +struct ParametricDataLoader{T, AT<:QPTOAT2, VT<:AbstractVector} + input::AT + input_dim::Int + input_time_steps::Int + parameters::VT + n_params::Int + + function ParametricDataLoader(data::QPTOAT2{T, 3}, parameters::AbstractVector) where {T} + input_dim, input_time_steps, n_params = _size(data) + @assert T == _eltype(parameters) "Provided data and parameters must have the same eltype!" + @assert length(parameters) == _size(data, 3) "The number of provided parameters and the parameter axis of the supplied data do not have the same length!" + + new{T, typeof(data), typeof(parameters)}(data, input_dim, input_time_steps, parameters, n_params) + end +end + +function ParametricDataLoader(input::AbstractMatrix{T}, parameters::AbstractVector) where {T} + ParametricDataLoader(reshape(input, size(input)..., 1), parameters) +end + +function ParametricDataLoader(ensemble_solution::EnsembleSolution{T, T1, Vector{ST}}) where {T, + T1, + DT <: DataSeries{T}, + ST <: GeometricSolution{T, T1, NamedTuple{(:q, :p), Tuple{DT, DT}}} + } + + sys_dim = length(ensemble_solution.s[1].q[0]) + input_time_steps = length(ensemble_solution.t) + n_params = length(ensemble_solution.s) + params = ensemble_solution.problem.parameters + + data = (q = zeros(T, sys_dim, input_time_steps, n_params), p = zeros(T, sys_dim, input_time_steps, n_params)) + + for (solution, i) in zip(ensemble_solution.s, axes(ensemble_solution.s, 1)) + for dim in 1:sys_dim + data.q[dim, :, i] = solution.q[:, dim] + data.p[dim, :, i] = solution.p[:, dim] + end + end + + ParametricDataLoader(data, params) +end + +# """ +# rearrange_parameters(parameters) +# +# Rearrange `parameters` such that they can be used by [`ParametricDataLoader`](@ref). +# """ +# function rearrange_parameters(parameters::Vector{<:NamedTuple}) +# parameters_rearranged = zeros(_eltype(parameters), ) +# end + +# function batch_over_two_axes(batch::Batch, number_columns::Int, third_dim::Int, dl::ParametricDataLoader) +# time_indices = shuffle(1:number_columns) +# parameter_indices = shuffle(1:third_dim) +# complete_indices = Iterators.product(time_indices, parameter_indices) |> collect |> vec +# batches = () +# n_batches = number_of_batches(dl, batch) +# for batch_number in 1:(n_batches - 1) +# batches = (batches..., complete_indices[(batch_number - 1) * batch.batch_size + 1 : batch_number * batch.batch_size]) +# end +# (batches..., complete_indices[(n_batches - 1) * batch.batch_size + 1:end]) +# end + +function optimize_for_one_epoch!( opt::Optimizer, + model, + ps::Union{NeuralNetworkParameters, NamedTuple}, + dl::ParametricDataLoader{T}, + batch::Batch, + _pullback::AbstractPullback, + λY) where T + count = 0 + total_error = T(0) + batches = batch(dl) + for batch_indices in batches + count += 1 + # these `copy`s should not be necessary! coming from a Zygote problem! + _input_nt_output_nt_parameter_indices = convert_input_and_batch_indices_to_array(dl, batch, batch_indices) + input_nt_output_nt = _input_nt_output_nt_parameter_indices[1:2] + loss_value, pullback = _pullback(ps, model, _input_nt_output_nt_parameter_indices) + total_error += loss_value + dp = _get_params(_unpack_tuple(pullback(one(loss_value)))) + optimization_step!(opt, λY, ps, dp) + end + total_error / count +end + +function parameter_indices(parameters::AbstractVector, parameter_indices::AbstractVector{Int}) + [parameters[parameter_index] for parameter_index in parameter_indices] +end + +function parameter_indices(parameters::AbstractVector, batch_indices::AbstractMatrix{Int}) + parameter_indices(parameters, batch_indices[2, :]) +end + +function parameter_indices(dl::ParametricDataLoader, indices::AbstractArray{Int}) + parameter_indices(dl.parameters, indices) +end + +function convert_input_and_batch_indices_to_array(dl::ParametricDataLoader{T, BT}, batch::Batch, batch_indices_tuple::Vector{Tuple{Int, Int}}) where {T, AT<:AbstractArray{T, 3}, BT<:NamedTuple{(:q, :p), Tuple{AT, AT}}} + backend = networkbackend(dl.input.q) + + # the batch size is smaller for the last batch + _batch_size = length(batch_indices_tuple) + + batch_indices = convert_vector_of_tuples_to_matrix(backend, batch_indices_tuple) + + q_input = KernelAbstractions.allocate(backend, T, dl.input_dim ÷ 2, batch.seq_length, _batch_size) + p_input = similar(q_input) + + assign_input_from_vector_of_tuples! = assign_input_from_vector_of_tuples_kernel!(backend) + assign_input_from_vector_of_tuples!(q_input, p_input, dl.input, batch_indices, ndrange=(dl.input_dim ÷ 2, batch.seq_length, _batch_size)) + + q_output = KernelAbstractions.allocate(backend, T, dl.input_dim ÷ 2, batch.prediction_window, _batch_size) + p_output = similar(q_output) + + assign_output_from_vector_of_tuples! = assign_output_from_vector_of_tuples_kernel!(backend) + assign_output_from_vector_of_tuples!(q_output, p_output, dl.input, batch_indices, batch.seq_length, ndrange=(dl.input_dim ÷ 2, batch.prediction_window, _batch_size)) + + (q = q_input, p = p_input), (q = q_output, p = p_output), parameter_indices(dl, batch_indices) +end + +function number_of_batches(dl::ParametricDataLoader, batch::Batch) + @assert dl.input_time_steps ≥ (batch.seq_length + batch.prediction_window) "The number of time steps has to be greater than sequence length + prediction window." + Int(ceil((dl.input_time_steps - (batch.seq_length - 1) - batch.prediction_window) * dl.n_params / batch.batch_size)) +end + +function (batch::Batch)(dl::ParametricDataLoader) + batch_over_two_axes(batch, dl.input_time_steps - (batch.seq_length - 1) - batch.prediction_window, dl.n_params, number_of_batches(dl, batch)) +end + +function (o::Optimizer)(nn::NeuralNetwork{<:GeneralizedHamiltonianArchitecture}, dl::ParametricDataLoader, batch::Batch{:FeedForward}, n_epochs::Int=1; kwargs...) + loss = ParametricLoss() + o(nn, dl, batch, n_epochs, loss; kwargs...) +end \ No newline at end of file diff --git a/src/loss/losses.jl b/src/loss/losses.jl index 93764f99..675f3c91 100644 --- a/src/loss/losses.jl +++ b/src/loss/losses.jl @@ -226,4 +226,10 @@ end function (loss::ReducedLoss)(model::Chain, params::NeuralNetworkParameters, input::CT, output::CT) where {CT <: QPTOAT} _compute_loss(loss.decoder(model(loss.encoder(input), params)), output) +end + +struct ParametricLoss <: NetworkLoss end + +function (loss::ParametricLoss)(model::Chain, params::NeuralNetworkParameters, input::CT, output::CT, system_parameters::NamedTuple) where {CT <: QPTOAT} + _compute_loss(model(input, system_parameters, params), output) end \ No newline at end of file diff --git a/test/data_loader/parametric_data_loader_test.jl b/test/data_loader/parametric_data_loader_test.jl new file mode 100644 index 00000000..572bb0bb --- /dev/null +++ b/test/data_loader/parametric_data_loader_test.jl @@ -0,0 +1,35 @@ +using GeometricMachineLearning +using GeometricMachineLearning: convert_input_and_batch_indices_to_array +using Test +using GeometricProblems.CoupledHarmonicOscillator: hodeensemble, default_parameters +using GeometricIntegrators: ImplicitMidpoint, integrate +using Random: seed! +seed!(123) + +function make_alternative_parameters_by_adding_constant(params::NamedTuple=default_parameters, a::Number=1.) + _keys = keys(params) + values = () + for key in _keys + values = (values..., params[key] .+ a) + end + NamedTuple{_keys}(values) +end + +alternative_parameters = make_alternative_parameters_by_adding_constant() + +h_ensemble = hodeensemble(; parameters = [default_parameters, alternative_parameters]) +sol = integrate(h_ensemble, ImplicitMidpoint()) +dl = ParametricDataLoader(sol) +batch = Batch(2) +batch_indices = batch(dl) + +function test_correct_parameter_splitting(n::Integer, parameters₁ = default_parameters, parameters₂ = alternative_parameters) + data_in_batch = convert_input_and_batch_indices_to_array(dl, batch, batch_indices[n]) + # the third output and the second datum in the batch + @test data_in_batch[3][1] == parameters₁ + @test data_in_batch[3][1] != parameters₂ + nothing +end + +test_correct_parameter_splitting(250, default_parameters, alternative_parameters) +test_correct_parameter_splitting(101, alternative_parameters, default_parameters) \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index d52f9dda..27304847 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -62,6 +62,7 @@ using Documenter: doctest @safetestset "Test the data loader in combination with optimization_step! " begin include("data_loader/data_loader_optimization_step.jl") end @safetestset "Optimizer functor with data loader for Adam " begin include("data_loader/optimizer_functor_with_adam.jl") end @safetestset "Test data loader for a tensor (q and p data) " begin include("data_loader/draw_batch_for_tensor_test.jl") end +@safetestset "Parametric DataLoader " begin include("data_loader/parametric_data_loader_test.jl") end @safetestset "Test NetworkLoss + Optimizer " begin include("network_losses/losses_and_optimization.jl") end diff --git a/test/training_phnn.jl b/test/training_phnn.jl new file mode 100644 index 00000000..afb1ba28 --- /dev/null +++ b/test/training_phnn.jl @@ -0,0 +1,30 @@ +using GeometricMachineLearning +using Test +using GeometricProblems.CoupledHarmonicOscillator: hodeensemble, default_parameters +using GeometricIntegrators: ImplicitMidpoint, integrate +using Random: seed! +seed!(123) + +function make_alternative_parameters_by_adding_constant(params::NamedTuple=default_parameters, n::Integer=1, a::Number=1.) + _keys = keys(params) + values = () + for (key, i) in zip(_keys, 1:length(_keys)) + values = i == n ? (values..., params[key] .+ a) : (values..., params[key]) + end + NamedTuple{_keys}(values) +end + +function make_alternative_parameters_by_adding_constant(params::NamedTuple, n::Integer, a_vals::Vector{<:Number}) + [make_alternative_parameters_by_adding_constant(params,n, a) for a ∈ a_vals] +end + +alternative_parameters = make_alternative_parameters_by_adding_constant(default_parameters, 1, Vector(.1:.1:10.)) + +h_ensemble = hodeensemble(; parameters = alternative_parameters) +sol = integrate(h_ensemble, ImplicitMidpoint()) +dl = ParametricDataLoader(sol) +batch = Batch(100) +arch = GeneralizedHamiltonianArchitecture(2; parameters = default_parameters) +nn = NeuralNetwork(arch) +o = Optimizer(AdamOptimizer(), nn) +o(nn, dl, batch) \ No newline at end of file From 0bea4d7fcc80c5bdb7c858cddf4eee0ef1a1cd31 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 30 Jul 2025 11:43:47 +0200 Subject: [PATCH 08/50] Added specific methods designed for dealing with PGHNNs. --- Project.toml | 2 + .../generalized_hamiltonian_neural_network.jl | 23 +++++++++-- src/loss/losses.jl | 2 +- src/pullbacks/zygote_pullback.jl | 2 + src/utils.jl | 40 ++++++++++++++++--- test/training_phnn.jl | 2 +- 6 files changed, 61 insertions(+), 10 deletions(-) diff --git a/Project.toml b/Project.toml index 4b99284f..1313f9b5 100644 --- a/Project.toml +++ b/Project.toml @@ -13,6 +13,7 @@ Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" GeometricBase = "9a0b12b7-583b-4f04-aa1f-d8551b6addc9" GeometricEquations = "c85262ba-a08a-430a-b926-d29770767bf2" +GeometricProblems = "18cb22b4-ad41-5c80-9c5f-710df63fbdc9" GeometricSolutions = "7843afe4-64f4-4df4-9231-049495c56661" HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" @@ -44,6 +45,7 @@ Distances = "0.10" ForwardDiff = "0.10, 1" GeometricBase = "0.11" GeometricEquations = "0.19" +GeometricProblems = "0.6.12" GeometricSolutions = "0.5" HDF5 = "0.16, 0.17" KernelAbstractions = "0.9" diff --git a/src/architectures/generalized_hamiltonian_neural_network.jl b/src/architectures/generalized_hamiltonian_neural_network.jl index cfa829ad..bcc615c4 100644 --- a/src/architectures/generalized_hamiltonian_neural_network.jl +++ b/src/architectures/generalized_hamiltonian_neural_network.jl @@ -147,8 +147,13 @@ function concatenate_array_with_parameters(qp::AbstractVector, params::OptionalP vcat(qp, ParameterHandling.flatten(params)[1]) end -function concatenate_array_with_parameters(qp::AbstractMatrix, params::OptionalParameters) - hcat((concatenate_array_with_parameters(qp[:, i], params) for i in axes(qp, 2))...) +# function concatenate_array_with_parameters(qp::AbstractMatrix, params::OptionalParameters) +# hcat((concatenate_array_with_parameters(qp[:, i], params) for i in axes(qp, 2))...) +# end + +function concatenate_array_with_parameters(qp::AbstractMatrix, params::AbstractVector) + @assert _size(qp, 2) == length(params) + LazyArrays.Vcat((concatenate_array_with_parameters(@view(qp[:, i]), params[i]) for i in axes(params, 1))...) end function (integrator::SymplecticEulerA{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} @@ -234,6 +239,9 @@ end return Expr(:block, calls...) end +index_qpt(qp::QPT2{T, 2}, i, j) where {T} = (q = qp.q[i, j], p = qp.p[i, j]) +index_gpt(qp::QPT2{T, 3}, i, j, k) where {T} = (q = qp.q[i, j, k], p = qp.p[i, j, k]) + function Chain(ghnn_arch::GeneralizedHamiltonianArchitecture) c = () kinetic_energy = SymbolicKineticEnergy(ghnn_arch.dim, ghnn_arch.width, ghnn_arch.nhidden, ghnn_arch.activation; parameters=ghnn_arch.parameters) @@ -251,6 +259,15 @@ function (nn::NeuralNetwork{GT})(qp::QPTOAT2, problem_params::OptionalParameters nn.model(qp, problem_params, params(nn)) end -function (model::Chain)(qp::QPTOAT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) +function (model::Chain)(qp::QPTOAT2, problem_params::OptionalParameters, params::Union{NeuralNetworkParameters, NamedTuple}) model((qp, problem_params), params) +end + +function (c::Chain)(qp::QPT2{T, 3}, system_params::AbstractVector, ps::Union{NamedTuple, NeuralNetworkParameters})::QPT2{T} where {T} + @assert size(qp.q, 3) == length(system_params) + @assert size(qp.q, 2) == 1 + output_vectorwise = [c(index_gpt(qp, :, 1, i), system_params[i], ps) for i in axes(system_params, 1)] + q_output = hcat([single_output_vectorwise.q for single_output_vectorwise ∈ output_vectorwise]...) + p_output = hcat([single_output_vectorwise.p for single_output_vectorwise ∈ output_vectorwise]...) + (q = reshape(q_output, size(q_output, 1), 1, size(q_output, 2)), p = reshape(p_output, size(p_output, 1), 1, size(p_output, 2))) end \ No newline at end of file diff --git a/src/loss/losses.jl b/src/loss/losses.jl index 675f3c91..e72b5218 100644 --- a/src/loss/losses.jl +++ b/src/loss/losses.jl @@ -230,6 +230,6 @@ end struct ParametricLoss <: NetworkLoss end -function (loss::ParametricLoss)(model::Chain, params::NeuralNetworkParameters, input::CT, output::CT, system_parameters::NamedTuple) where {CT <: QPTOAT} +function (loss::ParametricLoss)(model::Chain, params::Union{NamedTuple, NeuralNetworkParameters}, input::CT, output::CT, system_parameters::Union{NamedTuple, AbstractVector}) where {CT <: QPTOAT} _compute_loss(model(input, system_parameters, params), output) end \ No newline at end of file diff --git a/src/pullbacks/zygote_pullback.jl b/src/pullbacks/zygote_pullback.jl index e33e081d..fdd1362b 100644 --- a/src/pullbacks/zygote_pullback.jl +++ b/src/pullbacks/zygote_pullback.jl @@ -29,6 +29,8 @@ end (_pullback::ZygotePullback)(ps, model, input_nt::QPTOAT)::Tuple = Zygote.pullback(ps -> _pullback.loss(model, ps, input_nt), ps) (_pullback::ZygotePullback)(ps, model, input_nt_output_nt::Tuple{<:QPTOAT, <:QPTOAT})::Tuple = Zygote.pullback(ps -> _pullback.loss(model, ps, input_nt_output_nt...), ps) +(_pullback::ZygotePullback)(ps, model, input_and_parameters::Tuple{<:QPTOAT, <:QPTOAT, <:NamedTuple})::Tuple = Zygote.pullback(ps -> _pullback.loss(model, ps, input_and_parameters...), ps) +(_pullback::ZygotePullback)(ps, model, input_and_parameters::Tuple{<:QPTOAT, <:QPTOAT, <:AbstractVector})::Tuple = Zygote.pullback(ps -> _pullback.loss(model, ps, input_and_parameters...), ps) """ _processing(returned_pullback) diff --git a/src/utils.jl b/src/utils.jl index 9cf654ea..78c068ef 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -153,7 +153,7 @@ qp = (q = [1, 2], p = [3, 4]) """ const QPT{T} = NamedTuple{(:q, :p), Tuple{AT, AT}} where {T, N, AT <: AbstractArray{T, N}} -const QPT2{T} = NamedTuple{(:q, :p), Tuple{AT₁, AT₂}} where {T, N, AT₁ <: AbstractArray{T, N}, AT₂ <: AbstractArray{T, N}} +const QPT2{T, N} = NamedTuple{(:q, :p), Tuple{AT₁, AT₂}} where {T, N, AT₁ <: AbstractArray{T, N}, AT₂ <: AbstractArray{T, N}} @doc raw""" QPTOAT @@ -165,12 +165,42 @@ const QPTOAT = Union{QPT, AbstractArray} This could be data in ``(q, p)\in\mathbb{R}^{2d}`` form or come from an arbitrary vector space. """ -const QPTOAT{T} = Union{QPT{T}, AbstractArray{T}} where T +const QPTOAT{T} = Union{QPT{T}, AbstractArray{T}} where {T} -const QPTOAT2{T} = Union{QPT2{T}, AbstractArray{T}} where T +const QPTOAT2{T, N} = Union{QPT2{T, N}, AbstractArray{T, N}} where {T, N} Base.:≈(qp₁::QPT, qp₂::QPT) = (qp₁.q ≈ qp₂.q) & (qp₁.p ≈ qp₂.p) _eltype(x) = eltype(x) -_eltype(ps::NamedTuple) = _eltype(ps[1]) -_eltype(ps::Tuple) = _eltype(ps[1]) \ No newline at end of file +_eltype(::NTuple{N, FT}) where {N, FT <: AbstractFloat} = FT +_eltype(nt::NamedTuple) = _eltype(values(nt)) + +_size(x) = size(x) +function _size(qp::QPT) + q_size = _size(qp.q) + p_size = _size(qp.p) + @assert q_size == p_size + (2q_size[1], q_size[2:end]...) +end + +_size(x, a::Integer) = size(x, a) +function _size(qp::QPT, a::Integer) + q_size = _size(qp.q, a) + p_size = _size(qp.p, a) + @assert q_size == p_size + a == 1 ? 2q_size : q_size +end + +function _eltype(parameters::Vector{<:NamedTuple}) + types = DataType[] + for v ∈ parameters + push!(types, _eltype(v)) + end + T = types[1] + for T₂ ∈ types + T == T₂ || error("There are different types in the NamedTuples!") + end + T +end + +end_eltype(ps::Tuple) = _eltype(ps[1]) \ No newline at end of file diff --git a/test/training_phnn.jl b/test/training_phnn.jl index afb1ba28..2891f1ea 100644 --- a/test/training_phnn.jl +++ b/test/training_phnn.jl @@ -24,7 +24,7 @@ h_ensemble = hodeensemble(; parameters = alternative_parameters) sol = integrate(h_ensemble, ImplicitMidpoint()) dl = ParametricDataLoader(sol) batch = Batch(100) -arch = GeneralizedHamiltonianArchitecture(2; parameters = default_parameters) +arch = GeneralizedHamiltonianArchitecture(4; parameters = default_parameters) nn = NeuralNetwork(arch) o = Optimizer(AdamOptimizer(), nn) o(nn, dl, batch) \ No newline at end of file From 3a07584ba93d58fbabb39376147a42427e5b8232 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 30 Jul 2025 18:51:01 +0200 Subject: [PATCH 09/50] Fixed compilation problems with training pghnns. --- src/data_loader/parametric_data_loader.jl | 2 +- src/optimizers/optimizer.jl | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/data_loader/parametric_data_loader.jl b/src/data_loader/parametric_data_loader.jl index cb49092e..795761fe 100644 --- a/src/data_loader/parametric_data_loader.jl +++ b/src/data_loader/parametric_data_loader.jl @@ -81,7 +81,7 @@ function optimize_for_one_epoch!( opt::Optimizer, count += 1 # these `copy`s should not be necessary! coming from a Zygote problem! _input_nt_output_nt_parameter_indices = convert_input_and_batch_indices_to_array(dl, batch, batch_indices) - input_nt_output_nt = _input_nt_output_nt_parameter_indices[1:2] + # input_nt_output_nt = _input_nt_output_nt_parameter_indices[1:2] loss_value, pullback = _pullback(ps, model, _input_nt_output_nt_parameter_indices) total_error += loss_value dp = _get_params(_unpack_tuple(pullback(one(loss_value)))) diff --git a/src/optimizers/optimizer.jl b/src/optimizers/optimizer.jl index 1395ce42..5b9f7064 100644 --- a/src/optimizers/optimizer.jl +++ b/src/optimizers/optimizer.jl @@ -86,7 +86,7 @@ end ####################################################################################### # optimization step function -function _optimization_step!(o::Optimizer, λY::NamedTuple, ps::NamedTuple, cache::NamedTuple, dx::Union{NamedTuple, NeuralNetworkParameters}) +function _optimization_step!(o::Optimizer, λY::NamedTuple, ps::Union{NeuralNetworkParameters, NamedTuple}, cache::NamedTuple, dx::Union{NamedTuple, NeuralNetworkParameters}) gx = rgrad(ps, dx) B = global_rep(λY, gx) update!(o, cache, B) @@ -162,6 +162,10 @@ end rgrad(ps::NamedTuple, dx::Union{NamedTuple, NeuralNetworkParameters}) = apply_toNT(rgrad, ps, dx) +function rgrad(ps::NeuralNetworkParameters, dp::NamedTuple) + rgrad(NamedTuple{keys(ps)}(values(ps)), _get_params(dp)) +end + function rgrad(Y::AbstractVecOrMat, dx::AbstractVecOrMat) @assert size(Y) == size(dx) dx From 990d2a479eb202cdb659f9728ead098b3cdf601a Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Thu, 31 Jul 2025 07:53:01 +0200 Subject: [PATCH 10/50] Added missing docstrings. --- docs/src/architectures/hamiltonian_neural_network.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/src/architectures/hamiltonian_neural_network.md b/docs/src/architectures/hamiltonian_neural_network.md index 12380ef0..86f86387 100644 --- a/docs/src/architectures/hamiltonian_neural_network.md +++ b/docs/src/architectures/hamiltonian_neural_network.md @@ -53,5 +53,7 @@ GeometricMachineLearning.SymbolicPotentialEnergy GeometricMachineLearning.SymbolicKineticEnergy GeometricMachineLearning.build_gradient GeometricMachineLearning.GeneralizedHamiltonianArchitecture +GeometricMachineLearning.ForcingLayer +GeometricMachineLearning.ParametricDataLoader GeometricMachineLearning._processing ``` \ No newline at end of file From 0e27eec4ef33832ed5f3e3addc1244d05fa46b92 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 4 Aug 2025 19:09:04 +0200 Subject: [PATCH 11/50] Removed ambiguity by pulling the definition of the pullback inside the body of the function. --- src/data_loader/parametric_data_loader.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/data_loader/parametric_data_loader.jl b/src/data_loader/parametric_data_loader.jl index 795761fe..2c03049c 100644 --- a/src/data_loader/parametric_data_loader.jl +++ b/src/data_loader/parametric_data_loader.jl @@ -134,7 +134,7 @@ function (batch::Batch)(dl::ParametricDataLoader) batch_over_two_axes(batch, dl.input_time_steps - (batch.seq_length - 1) - batch.prediction_window, dl.n_params, number_of_batches(dl, batch)) end -function (o::Optimizer)(nn::NeuralNetwork{<:GeneralizedHamiltonianArchitecture}, dl::ParametricDataLoader, batch::Batch{:FeedForward}, n_epochs::Int=1; kwargs...) - loss = ParametricLoss() - o(nn, dl, batch, n_epochs, loss; kwargs...) +function (o::Optimizer)(nn::NeuralNetwork{<:GeneralizedHamiltonianArchitecture}, dl::ParametricDataLoader, batch::Batch{:FeedForward}, n_epochs::Integer=1, loss::NetworkLoss=ParametricLoss(); kwargs...) + _pullback::AbstractPullback = ZygotePullback(loss) + o(nn, dl, batch, n_epochs, loss, _pullback; kwargs...) end \ No newline at end of file From 3cc7ef266b96021c4ebd905220b605cb1329f08b Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 4 Aug 2025 19:09:47 +0200 Subject: [PATCH 12/50] Tried to improve performance by allocating closures explicitly. --- src/pullbacks/zygote_pullback.jl | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/pullbacks/zygote_pullback.jl b/src/pullbacks/zygote_pullback.jl index fdd1362b..30776722 100644 --- a/src/pullbacks/zygote_pullback.jl +++ b/src/pullbacks/zygote_pullback.jl @@ -27,10 +27,22 @@ struct ZygotePullback{NNLT} <: AbstractPullback{NNLT} loss::NNLT end -(_pullback::ZygotePullback)(ps, model, input_nt::QPTOAT)::Tuple = Zygote.pullback(ps -> _pullback.loss(model, ps, input_nt), ps) -(_pullback::ZygotePullback)(ps, model, input_nt_output_nt::Tuple{<:QPTOAT, <:QPTOAT})::Tuple = Zygote.pullback(ps -> _pullback.loss(model, ps, input_nt_output_nt...), ps) -(_pullback::ZygotePullback)(ps, model, input_and_parameters::Tuple{<:QPTOAT, <:QPTOAT, <:NamedTuple})::Tuple = Zygote.pullback(ps -> _pullback.loss(model, ps, input_and_parameters...), ps) -(_pullback::ZygotePullback)(ps, model, input_and_parameters::Tuple{<:QPTOAT, <:QPTOAT, <:AbstractVector})::Tuple = Zygote.pullback(ps -> _pullback.loss(model, ps, input_and_parameters...), ps) +function (_pullback::ZygotePullback)(ps, model, input_nt::QPTOAT)::Tuple + closure = ps -> _pullback.loss(model, ps, input_nt) + Zygote.pullback(closure, ps) +end +function (_pullback::ZygotePullback)(ps, model, input_nt_output_nt::Tuple{<:QPTOAT, <:QPTOAT})::Tuple + closure = ps -> _pullback.loss(model, ps, input_nt_output_nt...) + Zygote.pullback(closure, ps) +end +function (_pullback::ZygotePullback)(ps, model, input_and_parameters::Tuple{<:QPTOAT, <:QPTOAT, <:NamedTuple})::Tuple + closure = ps -> _pullback.loss(model, ps, input_and_parameters...) + Zygote.pullback(closure, ps) +end +function (_pullback::ZygotePullback)(ps, model, input_and_parameters::Tuple{<:QPTOAT, <:QPTOAT, <:AbstractVector})::Tuple + closure = ps -> _pullback.loss(model, ps, input_and_parameters...) + Zygote.pullback(closure, ps) +end """ _processing(returned_pullback) From 521fd17869b8eea60adf3575dd602f5facec31a7 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 4 Aug 2025 19:10:24 +0200 Subject: [PATCH 13/50] Added script for training a TimeDependentHarmonicOscillator. --- ...imeDependentHarmonicOscillator_Analytic.jl | 137 ++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 scripts/TimeDependentHarmonicOscillator_Analytic.jl diff --git a/scripts/TimeDependentHarmonicOscillator_Analytic.jl b/scripts/TimeDependentHarmonicOscillator_Analytic.jl new file mode 100644 index 00000000..8811c9d0 --- /dev/null +++ b/scripts/TimeDependentHarmonicOscillator_Analytic.jl @@ -0,0 +1,137 @@ +using HDF5 +using GeometricMachineLearning +using GeometricMachineLearning: QPT, QPT2 +using CairoMakie + +# PARAMETERS +omega = 1.0 # natural frequency of the harmonic Oscillator +Omega = 3.5 # frequency of the external sinusoidal forcing +F = 1.0 # amplitude of the external sinusoidal forcing +ni_dim = 10 # number of initial conditions per dimension (so ni_dim^2 total) +T = 2π +nt = 100 # number of time steps +dt = T/nt # time step + +# Generating the initial condition array +IC = vec( [(q=q0, p=p0) for q0 in range(-1, 1, ni_dim), p0 in range(-1, 1, ni_dim)] ) + +# Generating the solution array +ni = ni_dim^2 +q = zeros(Float64, ni, nt+1) +p = zeros(Float64, ni, nt+1) +t = collect(dt * range(0, nt, step=1)) + +""" +Turn a vector of numbers into a vector of `NamedTuple`s to be used by `ParametricDataLoader`. +""" +function turn_parameters_into_correct_format(t::AbstractVector, IC::AbstractVector{<:NamedTuple}) + vec_of_params = NamedTuple[] + for time_step ∈ t + time_step == t[end] || push!(vec_of_params, (t = time_step, )) + end + vcat((vec_of_params for _ in axes(IC, 1))...) +end + +for i in 1:nt+1 + for j=1:ni + q[j,i] = ( IC[j].p - Omega*F/(omega^2-Omega^2) )/ omega *sin(omega*t[i]) + IC[j].q*cos(omega*t[i]) + F/(omega^2-Omega^2)*sin(Omega*t[i]) + p[j,i] = -omega^2*IC[j].q*sin(omega*t[i]) + ( IC[j].p - Omega*F/(omega^2-Omega^2) )*cos(omega*t[i]) + Omega*F/(omega^2-Omega^2)*cos(Omega*t[i]) + end + +end + +@doc raw""" +Turn a `NamedTuple` of ``(q,p)`` data into two tensors of the correct format. + +This is the tricky part as the structure of the input array(s) needs to conform with the structure of the parameters. + +Here the data are rearranged in an array of size ``(n, 2, t_f - 1)`` where ``[t_0, t_1, \ldots, t_f]`` is the vector storing the time steps. + +If we deal with different initial conditions as well, we still put everything into the third (parameter) axis. + +# Example + +```jldoctest +using GeometricMachineLearning + +q = [1. 2. 3.; 4. 5. 6.] +p = [1.5 2.5 3.5; 4.5 5.5 6.5] +qp = (q = q, p = p) +turn_q_p_data_into_correct_format(qp) + +# output + +(q = [1.0 2.0; 4.0 5.0;;; 2.0 3.0; 5.0 6.0], p = [1.5 2.5; 4.5 5.5;;; 2.5 3.5; 5.5 6.5]) +``` +""" +function turn_q_p_data_into_correct_format(qp::QPT2{T, 2}) where {T} + number_of_time_steps = size(qp.q, 2) - 1 # not counting t₀ + number_of_initial_conditions = size(qp.q, 1) + q_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + p_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + for initial_condition_index ∈ 0:(number_of_initial_conditions - 1) + for time_index ∈ 1:number_of_time_steps + q_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index] + q_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index + 1] + p_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index] + p_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index + 1] + end + end + (q = q_array, p = p_array) +end + +# SAVING TO FILE + +# h5 = h5open(path, "w") +# write(h5, "q", q) +# write(h5, "p", p) +# write(h5, "t", t) +# +# attrs(h5)["ni"] = ni +# attrs(h5)["nt"] = nt +# attrs(h5)["dt"] = dt +# +# close(h5) + +""" +This takes time as a single additional parameter (third axis). +""" +function load_time_dependent_harmonic_oscillator_with_parametric_data_loader(qp::QPT{T}, t::AbstractVector{T}, IC::AbstractVector) where {T} + qp_reformatted = turn_q_p_data_into_correct_format(qp) + t_reformatted = turn_parameters_into_correct_format(t, IC) + ParametricDataLoader(qp_reformatted, t_reformatted) +end + +# This sets up the data loader +dl = load_time_dependent_harmonic_oscillator_with_parametric_data_loader((q = q, p = p), t, IC) + +# This sets up the neural network +width::Int = 4 +nhidden::Int = 4 +arch = GeneralizedHamiltonianArchitecture(2; parameters = turn_parameters_into_correct_format(t, IC)[1]) +nn = NeuralNetwork(arch) + +# This is where training starts +batch = Batch(1000) +o = Optimizer(AdamOptimizer(), nn) + +n_epochs = 100 +o(nn, dl, batch, n_epochs) + +# Testing the network +initial_conditions = (q = [-1.], p = [-0.11]) +n_steps = 100 +trajectory = (q = zeros(1, n_steps), p = zeros(1, n_steps)) +trajectory.q[:, 1] .= initial_conditions.q +trajectory.p[:, 1] .= initial_conditions.p +# note that we have to supply the parameters as a named tuple as well here: +for t_step ∈ 0:(n_steps-2) + qp_temporary = nn.model((q = [trajectory.q[1, t_step+1]], p = [trajectory.p[1, t_step+1]]), (t = t_step,), nn.params) + trajectory.q[:, t_step+2] .= qp_temporary.q + trajectory.p[:, t_step+2] .= qp_temporary.p +end + +fig = Figure() +ax = Axis(fig[1,1]) +lines!(ax, trajectory.q[1,:]; label="nn") +lines!(ax, q[50,:]; label="analytic") \ No newline at end of file From fe26451833e9703452493ad8198f957e7a8f166d Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 4 Aug 2025 19:46:40 +0200 Subject: [PATCH 14/50] Changed some parameters (including batch size). --- .../TimeDependentHarmonicOscillator_Analytic.jl | 17 +++++++++-------- .../generalized_hamiltonian_neural_network.jl | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/scripts/TimeDependentHarmonicOscillator_Analytic.jl b/scripts/TimeDependentHarmonicOscillator_Analytic.jl index 8811c9d0..34eaa79e 100644 --- a/scripts/TimeDependentHarmonicOscillator_Analytic.jl +++ b/scripts/TimeDependentHarmonicOscillator_Analytic.jl @@ -7,7 +7,7 @@ using CairoMakie omega = 1.0 # natural frequency of the harmonic Oscillator Omega = 3.5 # frequency of the external sinusoidal forcing F = 1.0 # amplitude of the external sinusoidal forcing -ni_dim = 10 # number of initial conditions per dimension (so ni_dim^2 total) +ni_dim = 2 # number of initial conditions per dimension (so ni_dim^2 total) T = 2π nt = 100 # number of time steps dt = T/nt # time step @@ -106,20 +106,21 @@ end dl = load_time_dependent_harmonic_oscillator_with_parametric_data_loader((q = q, p = p), t, IC) # This sets up the neural network -width::Int = 4 -nhidden::Int = 4 +width::Int = 8 +nhidden::Int = 20 arch = GeneralizedHamiltonianArchitecture(2; parameters = turn_parameters_into_correct_format(t, IC)[1]) nn = NeuralNetwork(arch) # This is where training starts -batch = Batch(1000) +batch_size = 5 +batch = Batch(batch_size) o = Optimizer(AdamOptimizer(), nn) -n_epochs = 100 -o(nn, dl, batch, n_epochs) +n_epochs = 1000 +loss_array = o(nn, dl, batch, n_epochs) # Testing the network -initial_conditions = (q = [-1.], p = [-0.11]) +initial_conditions = (q = [-1.], p = [-1]) n_steps = 100 trajectory = (q = zeros(1, n_steps), p = zeros(1, n_steps)) trajectory.q[:, 1] .= initial_conditions.q @@ -134,4 +135,4 @@ end fig = Figure() ax = Axis(fig[1,1]) lines!(ax, trajectory.q[1,:]; label="nn") -lines!(ax, q[50,:]; label="analytic") \ No newline at end of file +lines!(ax, q[1,:]; label="analytic") \ No newline at end of file diff --git a/src/architectures/generalized_hamiltonian_neural_network.jl b/src/architectures/generalized_hamiltonian_neural_network.jl index bcc615c4..4466ef4e 100644 --- a/src/architectures/generalized_hamiltonian_neural_network.jl +++ b/src/architectures/generalized_hamiltonian_neural_network.jl @@ -69,7 +69,7 @@ function Chain(se::SymbolicEnergy) end function SymbolicNeuralNetworks.Jacobian(f::SymbolicNeuralNetworks.EqT, nn::AbstractSymbolicNeuralNetwork, dim2::Integer) - # make differential + # make differential of input variables (not of parameters) Dx = SymbolicNeuralNetworks.symbolic_differentials(nn.input)[1:dim2] # Evaluation of gradient From 179ca6de85c80b40fe3c341f8e2a2142ed361141 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Fri, 8 Aug 2025 17:00:16 +0200 Subject: [PATCH 15/50] Made some changes to parameters. --- ...imeDependentHarmonicOscillator_Analytic.jl | 38 +++++++++++-------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/scripts/TimeDependentHarmonicOscillator_Analytic.jl b/scripts/TimeDependentHarmonicOscillator_Analytic.jl index 34eaa79e..1f10add7 100644 --- a/scripts/TimeDependentHarmonicOscillator_Analytic.jl +++ b/scripts/TimeDependentHarmonicOscillator_Analytic.jl @@ -6,10 +6,10 @@ using CairoMakie # PARAMETERS omega = 1.0 # natural frequency of the harmonic Oscillator Omega = 3.5 # frequency of the external sinusoidal forcing -F = 1.0 # amplitude of the external sinusoidal forcing -ni_dim = 2 # number of initial conditions per dimension (so ni_dim^2 total) +F = .9 # amplitude of the external sinusoidal forcing +ni_dim = 10 # number of initial conditions per dimension (so ni_dim^2 total) T = 2π -nt = 100 # number of time steps +nt = 500 # number of time steps dt = T/nt # time step # Generating the initial condition array @@ -36,6 +36,8 @@ for i in 1:nt+1 for j=1:ni q[j,i] = ( IC[j].p - Omega*F/(omega^2-Omega^2) )/ omega *sin(omega*t[i]) + IC[j].q*cos(omega*t[i]) + F/(omega^2-Omega^2)*sin(Omega*t[i]) p[j,i] = -omega^2*IC[j].q*sin(omega*t[i]) + ( IC[j].p - Omega*F/(omega^2-Omega^2) )*cos(omega*t[i]) + Omega*F/(omega^2-Omega^2)*cos(Omega*t[i]) + # q[j,i] = ( IC[j].p - Omega*F/(omega^2-Omega^2) )/ omega *exp(-omega*t[i]) - IC[j].q*exp(-omega*t[i]) + F/(omega^2-Omega^2)*exp(-Omega*t[i]) + # p[j,i] = -omega^2*IC[j].q*exp(-omega*t[i]) + ( IC[j].p + Omega*F/(omega^2-Omega^2) )*exp(-omega*t[i]) - Omega*F/(omega^2-Omega^2)*exp(-Omega*t[i]) end end @@ -106,33 +108,37 @@ end dl = load_time_dependent_harmonic_oscillator_with_parametric_data_loader((q = q, p = p), t, IC) # This sets up the neural network -width::Int = 8 -nhidden::Int = 20 -arch = GeneralizedHamiltonianArchitecture(2; parameters = turn_parameters_into_correct_format(t, IC)[1]) +width::Int = 2 +nhidden::Int = 1 +n_integrators::Int = 3 +arch = GeneralizedHamiltonianArchitecture(2; width = width, nhidden = nhidden, n_integrators = n_integrators, parameters = turn_parameters_into_correct_format(t, IC)[1]) nn = NeuralNetwork(arch) # This is where training starts -batch_size = 5 -batch = Batch(batch_size) -o = Optimizer(AdamOptimizer(), nn) +function train_network(batch_size::Integer=10, method=AdamOptimizer(), n_epochs=10) + batch = Batch(batch_size) + o = Optimizer(method, nn) + o(nn, dl, batch, n_epochs) +end + +loss_array = train_network() -n_epochs = 1000 -loss_array = o(nn, dl, batch, n_epochs) +trajectory_number = 20 # Testing the network -initial_conditions = (q = [-1.], p = [-1]) -n_steps = 100 +initial_conditions = (q = q[trajectory_number, 1], p = p[trajectory_number, 1]) +n_steps = nt trajectory = (q = zeros(1, n_steps), p = zeros(1, n_steps)) trajectory.q[:, 1] .= initial_conditions.q trajectory.p[:, 1] .= initial_conditions.p # note that we have to supply the parameters as a named tuple as well here: for t_step ∈ 0:(n_steps-2) - qp_temporary = nn.model((q = [trajectory.q[1, t_step+1]], p = [trajectory.p[1, t_step+1]]), (t = t_step,), nn.params) + qp_temporary = nn.model((q = [trajectory.q[1, t_step+1]], p = [trajectory.p[1, t_step+1]]), (t = t[t_step+1],), nn.params) trajectory.q[:, t_step+2] .= qp_temporary.q - trajectory.p[:, t_step+2] .= qp_temporary.p + trajectory.p[:, t_step+2] .= qp_temporary.p end fig = Figure() ax = Axis(fig[1,1]) lines!(ax, trajectory.q[1,:]; label="nn") -lines!(ax, q[1,:]; label="analytic") \ No newline at end of file +lines!(ax, q[trajectory_number,:]; label="analytic") \ No newline at end of file From 3adf8a40ba35146b389c42cef9018d7057b87164 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Sun, 10 Aug 2025 17:52:39 +0200 Subject: [PATCH 16/50] using relu now. --- scripts/TimeDependentHarmonicOscillator_Analytic.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/TimeDependentHarmonicOscillator_Analytic.jl b/scripts/TimeDependentHarmonicOscillator_Analytic.jl index 1f10add7..ca234037 100644 --- a/scripts/TimeDependentHarmonicOscillator_Analytic.jl +++ b/scripts/TimeDependentHarmonicOscillator_Analytic.jl @@ -1,7 +1,8 @@ using HDF5 using GeometricMachineLearning -using GeometricMachineLearning: QPT, QPT2 +using GeometricMachineLearning: QPT, QPT2, Activation using CairoMakie +using NNlib: relu # PARAMETERS omega = 1.0 # natural frequency of the harmonic Oscillator @@ -110,8 +111,8 @@ dl = load_time_dependent_harmonic_oscillator_with_parametric_data_loader((q = q, # This sets up the neural network width::Int = 2 nhidden::Int = 1 -n_integrators::Int = 3 -arch = GeneralizedHamiltonianArchitecture(2; width = width, nhidden = nhidden, n_integrators = n_integrators, parameters = turn_parameters_into_correct_format(t, IC)[1]) +n_integrators::Int = 1 +arch = GeneralizedHamiltonianArchitecture(2; activation = relu, width = width, nhidden = nhidden, n_integrators = n_integrators, parameters = turn_parameters_into_correct_format(t, IC)[1]) nn = NeuralNetwork(arch) # This is where training starts From d519326ce63bac463ad0628a6fa6c41fae7658f0 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Sun, 10 Aug 2025 17:53:58 +0200 Subject: [PATCH 17/50] One does not have to specify an AbstractNeuralNetworks.Activation to make this work now. --- src/architectures/generalized_hamiltonian_neural_network.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/architectures/generalized_hamiltonian_neural_network.jl b/src/architectures/generalized_hamiltonian_neural_network.jl index 4466ef4e..98cc0bce 100644 --- a/src/architectures/generalized_hamiltonian_neural_network.jl +++ b/src/architectures/generalized_hamiltonian_neural_network.jl @@ -11,7 +11,7 @@ struct SymbolicEnergy{AT <: Activation, PT, Kinetic} parameter_convert::PT activation::AT - function SymbolicEnergy(dim, width=dim, nhidden=HNN_nhidden_default, activation::Activation=HNN_activation_default; parameters::OptionalParameters=NullParameters(), type) + function SymbolicEnergy(dim, width, nhidden, activation::Activation; parameters::OptionalParameters=NullParameters(), type) @assert iseven(dim) "The input dimension must be an even integer!" flattened_parameters = ParameterHandling.flatten(parameters) parameter_length = length(flattened_parameters[1]) @@ -227,6 +227,7 @@ struct GeneralizedHamiltonianArchitecture{AT, PT <: OptionalParameters} <: Hamil activation::AT function GeneralizedHamiltonianArchitecture(dim; width=dim, nhidden=HNN_nhidden_default, n_integrators::Integer=1, activation=HNN_activation_default, parameters=NullParameters()) + activation = (typeof(activation) <: Activation) ? activation : Activation(activation) new{typeof(activation), typeof(parameters)}(dim, width, nhidden, n_integrators, parameters, activation) end end From 2bded0b61931ac8a9abd236750d6337c1eb71614 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Sun, 10 Aug 2025 17:55:06 +0200 Subject: [PATCH 18/50] Some changes that prohibited learning with a relu activation before. I also removed the ubiquitous warning messages observalbe before. --- src/optimizers/optimizer.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/optimizers/optimizer.jl b/src/optimizers/optimizer.jl index 5b9f7064..9231d609 100644 --- a/src/optimizers/optimizer.jl +++ b/src/optimizers/optimizer.jl @@ -162,8 +162,15 @@ end rgrad(ps::NamedTuple, dx::Union{NamedTuple, NeuralNetworkParameters}) = apply_toNT(rgrad, ps, dx) +rgrad(a::AbstractArray, ::Nothing) = rgrad(a, zero(a)) + +rgrad(nt::NamedTuple, ::Nothing) = rgrad(nt, apply_toNT(zero, nt)) + +_get_params_without_warning(nt) = _get_params(nt) +_get_params_without_warning(nt::NamedTuple{(:params,), Tuple{AT}}) where {AT <: Any} = nt.params + function rgrad(ps::NeuralNetworkParameters, dp::NamedTuple) - rgrad(NamedTuple{keys(ps)}(values(ps)), _get_params(dp)) + rgrad(NamedTuple{keys(ps)}(values(ps)), _get_params_without_warning(dp)) end function rgrad(Y::AbstractVecOrMat, dx::AbstractVecOrMat) From 42cd54fbb5664870af73be19152e8f6cdc8cda80 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 11 Aug 2025 11:22:36 +0200 Subject: [PATCH 19/50] Implemented forced Hamiltonian systems (with forcing in the p coordinates). --- src/GeometricMachineLearning.jl | 2 + src/architectures/forced_sympnet.jl | 63 ++++++++++++ src/layers/forcing_dissipation_layers.jl | 124 ++++++++++++++++++++++- src/layers/sympnets.jl | 12 +-- 4 files changed, 193 insertions(+), 8 deletions(-) create mode 100644 src/architectures/forced_sympnet.jl diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl index a318a1db..bb23be81 100644 --- a/src/GeometricMachineLearning.jl +++ b/src/GeometricMachineLearning.jl @@ -314,6 +314,8 @@ module GeometricMachineLearning include("data_loader/parametric_data_loader.jl") + include("architectures/forced_sympnet.jl") + export ForcedSympNet export default_optimizer diff --git a/src/architectures/forced_sympnet.jl b/src/architectures/forced_sympnet.jl new file mode 100644 index 00000000..a80bcd9e --- /dev/null +++ b/src/architectures/forced_sympnet.jl @@ -0,0 +1,63 @@ +@doc raw""" + ForcedSympNet <: NeuralNetworkIntegrator + +`ForcedSympNet`s are based on [`SympNet`](@ref)s [jin2020sympnets](@cite) and include [`ForcingLayer`](@ref)s. They are based on [`GSympNet`](@ref)s. + +!!! warn + For the moment only ``p`` forcing is implemented (i.e. we only use [`ForcingLayerP`](@ref)). + +# Constructor + +```julia +ForcedSympNet(d) +``` + +Make a forced SympNet with dimension ``d.`` + +# Arguments + +Keyword arguments are: +- `upscaling_dimension::Int = 2d`: The *upscaling dimension* of the gradient layer. See the documentation for [`GradientLayerQ`](@ref) and [`GradientLayerP`](@ref) for further explanation. +- `n_layers::Int""" * "$(g_n_layers_default)`" * raw""": The number of layers (i.e. the total number of [`GradientLayerQ`](@ref) and [`GradientLayerP`](@ref)). +- `activation""" * "$(g_activation_default)`" * raw""": The activation function that is applied. +- `init_upper::Bool""" * "$(g_init_upper_default)`" * raw""": Initialize the gradient layer so that it first modifies the $q$-component. +""" +struct ForcedSympNet{AT} <: NeuralNetworkIntegrator + dim::Int + upscaling_dimension::Int + n_layers::Int + act::AT + init_upper::Bool + + function ForcedSympNet(dim::Integer; + upscaling_dimension = 2 * dim, + n_layers = g_n_layers_default, + activation = g_activation_default, + init_upper = g_init_upper_default) + new{typeof(activation)}(dim, upscaling_dimension, n_layers, activation, init_upper) + end + + function ForcedSympNet(dl::DataLoader; + upscaling_dimension = 2 * dl.input_dim, + n_layers = g_n_layers_default, + activation = g_activation_default, + init_upper = g_init_upper_default) + new{typeof(activation)}(dl.input_dim, upscaling_dimension, n_layers, activation, init_upper) + end +end + +function Chain(arch::ForcedSympNet) + layers = () + is_upper_criterion = arch.init_upper ? isodd : iseven + for i in 1:arch.n_layers + layers = (layers..., + if is_upper_criterion(i) + GradientLayerQ(arch.dim, arch.upscaling_dimension, arch.act) + else + GradientLayerP(arch.dim, arch.upscaling_dimension, arch.act) + end + ) + layers = (layers..., ForcingLayerP(arch.dim, arch.upscaling_dimension, arch.n_layers, arch.act; return_parameters=false)) + end + Chain(layers...) +end \ No newline at end of file diff --git a/src/layers/forcing_dissipation_layers.jl b/src/layers/forcing_dissipation_layers.jl index c2d31928..f9348e14 100644 --- a/src/layers/forcing_dissipation_layers.jl +++ b/src/layers/forcing_dissipation_layers.jl @@ -1,6 +1,126 @@ @doc raw""" ForcingLayer <: AbstractExplicitLayer -An abstract type that summarizes layers that can learn dissipative terms, but not conservative ones. +Layers that can learn dissipative or forcing terms, but not conservative ones. + +Use the constructors [`ForcingLayerQ`](@ref) and [`ForcingLayerP`](@ref) for this. """ -abstract type ForcingLayer{M, N} <: AbstractExplicitLayer{M, N} end \ No newline at end of file +struct ForcingLayer{M, N, PT<:Base.Callable, CT, type, ReturnParameters} <: AbstractExplicitLayer{M, N} + dim::Int + width::Int + nhidden::Int + parameter_length::Int + parameter_convert::PT + model::CT +end + +function initialparameters(rng::Random.AbstractRNG, init_weight::AbstractNeuralNetworks.Initializer, integrator::ForcingLayer, backend::KernelAbstractions.Backend, ::Type{T}) where {T} + initialparameters(rng, init_weight, integrator.model, backend, T) +end + +""" + ForcingLayerQ + +A layer that is derived from the more general [`ForcingLayer`](@ref) and only changes the ``q`` component. +""" +const ForcingLayerQ{M, N, FT, AT, ReturnParameters} = ForcingLayer{M, N, FT, AT, :Q, ReturnParameters} + +""" + ForcingLayerP + +A layer that is derived from the more general [`ForcingLayer`](@ref) and only changes the ``p`` component. +""" +const ForcingLayerP{M, N, FT, AT, ReturnParameters} = ForcingLayer{M, N, FT, AT, :P, ReturnParameters} + +function build_chain(dim::Integer, width::Integer, nhidden::Integer, parameter_length::Integer, activation) + inner_layers = Tuple( + [Dense(width, width, activation) for _ in 1:nhidden] + ) + + Chain( + Dense(dim÷2 + parameter_length, width, activation), + inner_layers..., + Linear(width, 1; use_bias = false) + ) +end + +function ForcingLayer(dim::Integer, width::Integer, nhidden::Integer, activation; parameters::OptionalParameters, return_parameters::Bool, type::Symbol) + flattened_parameters = ParameterHandling.flatten(parameters) + parameter_length = length(flattened_parameters[1]) + c = build_chain(dim, width, nhidden, parameter_length, activation) + ForcingLayer{dim, dim, typeof(flattened_parameters[2]), typeof(c), type, return_parameters}(dim, width, nhidden, parameter_length, flattened_parameters[2], c) +end + +""" + ForcingLayerQ(dim) + +# Examples + +```julia +ForcingLayerQ(dim, width, nhidden, activation; parameters, return_parameters) +``` +""" +function ForcingLayerQ(dim::Integer, width::Integer=dim, nhidden::Integer=HNN_nhidden_default, activation=HNN_activation_default; parameters::OptionalParameters=NullParameters(), return_parameters::Bool=false) + ForcingLayer(dim, width, nhidden, activation; parameters=parameters, return_parameters=return_parameters, type=:Q) +end + +""" + ForcingLayerP(dim) + +See [`ForcingLayerQ`](@ref). + +# Examples + +```julia +ForcingLayerP(dim, width, nhidden, activation; parameters, return_parameters) +``` +""" +function ForcingLayerP(dim::Integer, width::Integer=dim, nhidden::Integer=HNN_nhidden_default, activation=HNN_activation_default; parameters::OptionalParameters=NullParameters(), return_parameters::Bool=false) + ForcingLayer(dim, width, nhidden, activation; parameters=parameters, return_parameters=return_parameters, type=:P) +end + +function (integrator::ForcingLayerQ{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.q, problem_params) + (q = qp.q + integrator.model(input, params), p = qp.p) +end + +function (integrator::ForcingLayerP{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.p, problem_params) + (q = qp.q, p = qp.p + integrator.model(input, params)) +end + +function (integrator::ForcingLayerQ{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.q, problem_params) + ((q = qp.q + integrator.model(input, params), p = qp.p), problem_params) +end + +function (integrator::ForcingLayerP{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(qp.p, problem_params) + ((q = qp.q, p = qp.p + integrator.model(input, params)), problem_params) +end + +function (integrator::ForcingLayer)(qp_params::Tuple{<:QPTOAT2, <:OptionalParameters}, params::NeuralNetworkParameters) + integrator(qp_params..., params) +end + +function (integrator::ForcingLayer)(::TT, ::NeuralNetworkParameters) where {TT <: Tuple} + error("The input is of type $(TT). This shouldn't be the case!") +end + +function (integrator::ForcingLayer{M, N, FT, AT, Type, true})(qp::AbstractArray, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT, Type} + @assert iseven(size(qp, 1)) + n = size(qp, 1)÷2 + qp_split = assign_q_and_p(qp, n) + evaluated = integrator(qp_split, problem_params, params)[1] + (vcat(evaluated.q, evaluated.p), problem_params) +end + +function (integrator::ForcingLayer{M, N, FT, AT, Type, false})(qp::AbstractArray, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT, Type} + @assert iseven(size(qp, 1)) + n = size(qp, 1)÷2 + qp_split = assign_q_and_p(qp, n) + evaluated = integrator(qp_split, problem_params, params) + vcat(evaluated.q, evaluated.p) +end + +(integrator::ForcingLayer)(qp::QPTOAT2, params::NeuralNetworkParameters) = integrator(qp, NullParameters(), params) \ No newline at end of file diff --git a/src/layers/sympnets.jl b/src/layers/sympnets.jl index ba6e2c15..6187b7b2 100644 --- a/src/layers/sympnets.jl +++ b/src/layers/sympnets.jl @@ -236,36 +236,36 @@ function custom_vec_mul(scale::AbstractVector{T}, x::AbstractArray{T, 3}) where vec_tensor_mul(scale, x) end -@inline function (d::ActivationLayerQ{M, M})(x::NamedTuple, ps) where {M} +@inline function (d::ActivationLayerQ{M, M})(x::QPT2, ps) where {M} size(x.q, 1) == M÷2 || error("Dimension mismatch.") return (q = x.q + custom_vec_mul(ps.scale, d.activation.(x.p)), p = x.p) end -@inline function (d::ActivationLayerP{M, M})(x::NamedTuple, ps) where {M} +@inline function (d::ActivationLayerP{M, M})(x::QPT2, ps) where {M} size(x.q, 1) == M÷2 || error("Dimension mismatch.") return (q = x.q, p = x.p + custom_vec_mul(ps.scale, d.activation.(x.q))) end -@inline function (d::GradientLayerQ{M, M})(x::NamedTuple, ps) where {M} +@inline function (d::GradientLayerQ{M, M})(x::QPT2, ps) where {M} size(x.q, 1) == M÷2 || error("Dimension mismatch.") (q = x.q + custom_mat_mul(ps.weight', (custom_vec_mul(ps.scale, d.activation.(custom_mat_mul(ps.weight, x.p) .+ ps.bias)))), p = x.p) end -@inline function(d::GradientLayerP{M, M})(x::NamedTuple, ps) where {M} +@inline function(d::GradientLayerP{M, M})(x::QPT2, ps) where {M} size(x.q, 1) == M÷2 || error("Dimension mismatch.") (q = x.q, p = x.p + custom_mat_mul(ps.weight', (custom_vec_mul(ps.scale, d.activation.(custom_mat_mul(ps.weight, x.q) .+ ps.bias))))) end -@inline function(d::LinearLayerQ{M, M})(x::NamedTuple, ps) where {M} +@inline function(d::LinearLayerQ{M, M})(x::QPT2, ps) where {M} size(x.q, 1) == M÷2 || error("Dimension mismatch.") (q = x.q + custom_mat_mul(ps.weight, x.p), p = x.p) end -@inline function(d::LinearLayerP{M, M})(x::NamedTuple, ps) where {M} +@inline function(d::LinearLayerP{M, M})(x::QPT2, ps) where {M} size(x.q, 1) == M÷2 || error("Dimension mismatch.") (q = x.q, p = x.p + custom_mat_mul(ps.weight, x.q)) end From 3a2f7294f8bde50072e4ee543cb439a9e0830976 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 11 Aug 2025 11:22:57 +0200 Subject: [PATCH 20/50] Added script for ForcedHamiltonianSystem. --- scripts/ForcedHamiltonianSystem.jl | 126 +++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 scripts/ForcedHamiltonianSystem.jl diff --git a/scripts/ForcedHamiltonianSystem.jl b/scripts/ForcedHamiltonianSystem.jl new file mode 100644 index 00000000..ecfaa7de --- /dev/null +++ b/scripts/ForcedHamiltonianSystem.jl @@ -0,0 +1,126 @@ +using HDF5 +using GeometricMachineLearning +using GeometricMachineLearning: QPT, QPT2 +using CairoMakie +using NNlib: relu + +# PARAMETERS +omega = 1.0 # natural frequency of the harmonic Oscillator +Omega = 3.5 # frequency of the external sinusoidal forcing +F = .9 # amplitude of the external sinusoidal forcing +ni_dim = 10 # number of initial conditions per dimension (so ni_dim^2 total) +T = 2π +nt = 500 # number of time steps +dt = T/nt # time step + +# Generating the initial condition array +IC = vec( [(q=q0, p=p0) for q0 in range(-1, 1, ni_dim), p0 in range(-1, 1, ni_dim)] ) + +# Generating the solution array +ni = ni_dim^2 +q = zeros(Float64, ni, nt+1) +p = zeros(Float64, ni, nt+1) +t = collect(dt * range(0, nt, step=1)) + +for i in 1:nt+1 + for j=1:ni + q[j,i] = ( IC[j].p - Omega*F/(omega^2-Omega^2) )/ omega *sin(omega*t[i]) + IC[j].q*cos(omega*t[i]) + F/(omega^2-Omega^2)*sin(Omega*t[i]) + p[j,i] = -omega^2*IC[j].q*sin(omega*t[i]) + ( IC[j].p - Omega*F/(omega^2-Omega^2) )*cos(omega*t[i]) + Omega*F/(omega^2-Omega^2)*cos(Omega*t[i]) + # q[j,i] = ( IC[j].p - Omega*F/(omega^2-Omega^2) )/ omega *exp(-omega*t[i]) - IC[j].q*exp(-omega*t[i]) + F/(omega^2-Omega^2)*exp(-Omega*t[i]) + # p[j,i] = -omega^2*IC[j].q*exp(-omega*t[i]) + ( IC[j].p + Omega*F/(omega^2-Omega^2) )*exp(-omega*t[i]) - Omega*F/(omega^2-Omega^2)*exp(-Omega*t[i]) + end + +end + +@doc raw""" +Turn a `NamedTuple` of ``(q,p)`` data into two tensors of the correct format. + +This is the tricky part as the structure of the input array(s) needs to conform with the structure of the parameters. + +Here the data are rearranged in an array of size ``(n, 2, t_f - 1)`` where ``[t_0, t_1, \ldots, t_f]`` is the vector storing the time steps. + +If we deal with different initial conditions as well, we still put everything into the third (parameter) axis. + +# Example + +```jldoctest +using GeometricMachineLearning + +q = [1. 2. 3.; 4. 5. 6.] +p = [1.5 2.5 3.5; 4.5 5.5 6.5] +qp = (q = q, p = p) +turn_q_p_data_into_correct_format(qp) + +# output + +(q = [1.0 2.0; 4.0 5.0;;; 2.0 3.0; 5.0 6.0], p = [1.5 2.5; 4.5 5.5;;; 2.5 3.5; 5.5 6.5]) +``` +""" +function turn_q_p_data_into_correct_format(qp::QPT2{T, 2}) where {T} + number_of_time_steps = size(qp.q, 2) - 1 # not counting t₀ + number_of_initial_conditions = size(qp.q, 1) + q_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + p_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + for initial_condition_index ∈ 0:(number_of_initial_conditions - 1) + for time_index ∈ 1:number_of_time_steps + q_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index] + q_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index + 1] + p_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index] + p_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index + 1] + end + end + (q = q_array, p = p_array) +end + +# SAVING TO FILE + +# h5 = h5open(path, "w") +# write(h5, "q", q) +# write(h5, "p", p) +# write(h5, "t", t) +# +# attrs(h5)["ni"] = ni +# attrs(h5)["nt"] = nt +# attrs(h5)["dt"] = dt +# +# close(h5) + +# This sets up the data loader +dl = DataLoader(turn_q_p_data_into_correct_format((q = q, p = p))) + +# This sets up the neural network +width::Int = 10 +nhidden::Int = 4 +activation = relu +arch = ForcedSympNet(2; upscaling_dimension = width, + n_layers = nhidden, + activation = activation) +nn = NeuralNetwork(arch) + +# This is where training starts +function train_network(batch_size::Integer=5000, method=AdamOptimizer(), n_epochs=1000) + batch = Batch(batch_size) + o = Optimizer(method, nn) + o(nn, dl, batch, n_epochs) +end + +loss_array = train_network() + +trajectory_number = 20 + +# Testing the network +initial_conditions = (q = q[trajectory_number, 1], p = p[trajectory_number, 1]) +n_steps = nt +trajectory = (q = zeros(1, n_steps), p = zeros(1, n_steps)) +trajectory.q[:, 1] .= initial_conditions.q +trajectory.p[:, 1] .= initial_conditions.p +for t_step ∈ 0:(n_steps-2) + qp_temporary = nn.model((q = [trajectory.q[1, t_step+1]], p = [trajectory.p[1, t_step+1]]), nn.params) + trajectory.q[:, t_step+2] .= qp_temporary.q + trajectory.p[:, t_step+2] .= qp_temporary.p +end + +fig = Figure() +ax = Axis(fig[1,1]) +lines!(ax, trajectory.q[1,:]; label="nn") +lines!(ax, q[trajectory_number,:]; label="analytic") \ No newline at end of file From 27b944e8d25e99ccb8ce8f7d313efe9e0599a4dd Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 11 Aug 2025 11:24:08 +0200 Subject: [PATCH 21/50] Renamed Last to ReturnParameters. --- .../generalized_hamiltonian_neural_network.jl | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/architectures/generalized_hamiltonian_neural_network.jl b/src/architectures/generalized_hamiltonian_neural_network.jl index 98cc0bce..5d74c401 100644 --- a/src/architectures/generalized_hamiltonian_neural_network.jl +++ b/src/architectures/generalized_hamiltonian_neural_network.jl @@ -119,7 +119,7 @@ function build_gradient(se::SymbolicEnergy) SymbolicNeuralNetworks.build_nn_function(SymbolicNeuralNetworks.derivative(□)', nn.params, nn.input) end -struct SymplecticEuler{M, N, FT<:Base.Callable, MT<:Chain, type, Last} <: AbstractExplicitLayer{M, N} +struct SymplecticEuler{M, N, FT<:Base.Callable, MT<:Chain, type, ReturnParameters} <: AbstractExplicitLayer{M, N} gradient_function::FT energy_model::MT end @@ -128,19 +128,19 @@ function initialparameters(rng::Random.AbstractRNG, init_weight::AbstractNeuralN initialparameters(rng, init_weight, integrator.energy_model, backend, T) end -const SymplecticEulerA{M, N, FT, AT, Last} = SymplecticEuler{M, N, FT, AT, :A} -const SymplecticEulerB{M, N, FT, AT, Last} = SymplecticEuler{M, N, FT, AT, :B} +const SymplecticEulerA{M, N, FT, AT, ReturnParameters} = SymplecticEuler{M, N, FT, AT, :A, ReturnParameters} +const SymplecticEulerB{M, N, FT, AT, ReturnParameters} = SymplecticEuler{M, N, FT, AT, :B, ReturnParameters} -function SymplecticEulerA(se::SymbolicKineticEnergy; last_step::Bool=false) +function SymplecticEulerA(se::SymbolicKineticEnergy; return_parameters::Bool) gradient_function = build_gradient(se) c = Chain(se) - SymplecticEuler{se.dim, se.dim, typeof(gradient_function), typeof(c), :A, last_step}(gradient_function, c) + SymplecticEuler{se.dim, se.dim, typeof(gradient_function), typeof(c), :A, return_parameters}(gradient_function, c) end -function SymplecticEulerB(se::SymbolicPotentialEnergy; last_step::Bool=false) +function SymplecticEulerB(se::SymbolicPotentialEnergy; return_parameters::Bool) gradient_function = build_gradient(se) c = Chain(se) - SymplecticEuler{se.dim, se.dim, typeof(gradient_function), typeof(c), :B, last_step}(gradient_function, c) + SymplecticEuler{se.dim, se.dim, typeof(gradient_function), typeof(c), :B, return_parameters}(gradient_function, c) end function concatenate_array_with_parameters(qp::AbstractVector, params::OptionalParameters) @@ -156,22 +156,24 @@ function concatenate_array_with_parameters(qp::AbstractMatrix, params::AbstractV LazyArrays.Vcat((concatenate_array_with_parameters(@view(qp[:, i]), params[i]) for i in axes(params, 1))...) end -function (integrator::SymplecticEulerA{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} +concatenate_array_with_parameters(a::AbstractArray{T, 3}, ::GeometricBase.NullParameters) where {T} = a + +function (integrator::SymplecticEulerA{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} input = concatenate_array_with_parameters(qp.p, problem_params) (q = @view((qp.q + integrator.gradient_function(input, params))[:, 1]), p = qp.p) end -function (integrator::SymplecticEulerB{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} +function (integrator::SymplecticEulerB{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} input = concatenate_array_with_parameters(qp.q, problem_params) (q = qp.q, p = @view((qp.p - integrator.gradient_function(input, params))[:, 1])) end -function (integrator::SymplecticEulerA{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} +function (integrator::SymplecticEulerA{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} input = concatenate_array_with_parameters(qp.p, problem_params) ((q = @view((qp.q + integrator.gradient_function(input, params))[:, 1]), p = qp.p), problem_params) end -function (integrator::SymplecticEulerB{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} +function (integrator::SymplecticEulerB{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} input = concatenate_array_with_parameters(qp.q, problem_params) ((q = qp.q, p = @view((qp.p - integrator.gradient_function(input, params))[:, 1])), problem_params) end @@ -184,7 +186,7 @@ function (integrator::SymplecticEuler)(::TT, ::NeuralNetworkParameters) where {T error("The input is of type $(TT). This shouldn't be the case!") end -function (integrator::SymplecticEuler{M, N, FT, AT, Type, false})(qp::AbstractArray, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT, Type} +function (integrator::SymplecticEuler{M, N, FT, AT, Type, true})(qp::AbstractArray, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT, Type} @assert iseven(size(qp, 1)) n = size(qp, 1)÷2 qp_split = assign_q_and_p(qp, n) @@ -192,7 +194,7 @@ function (integrator::SymplecticEuler{M, N, FT, AT, Type, false})(qp::AbstractAr (vcat(evaluated.q, evaluated.p), problem_params) end -function (integrator::SymplecticEuler{M, N, FT, AT, Type, true})(qp::AbstractArray, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT, Type} +function (integrator::SymplecticEuler{M, N, FT, AT, Type, false})(qp::AbstractArray, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT, Type} @assert iseven(size(qp, 1)) n = size(qp, 1)÷2 qp_split = assign_q_and_p(qp, n) @@ -249,8 +251,8 @@ function Chain(ghnn_arch::GeneralizedHamiltonianArchitecture) potential_energy = SymbolicPotentialEnergy(ghnn_arch.dim, ghnn_arch.width, ghnn_arch.nhidden, ghnn_arch.activation; parameters=ghnn_arch.parameters) for n in 1:ghnn_arch.n_integrators - c = (c..., SymplecticEulerA(kinetic_energy; last_step = false)) - c = n == ghnn_arch.n_integrators ? (c..., SymplecticEulerB(potential_energy; last_step=true)) : (c..., SymplecticEulerB(potential_energy; last_step=false)) + c = (c..., SymplecticEulerA(kinetic_energy; return_parameters = true)) + c = n == ghnn_arch.n_integrators ? (c..., SymplecticEulerB(potential_energy; return_parameters=false)) : (c..., SymplecticEulerB(potential_energy; return_parameters=true)) end Chain(c...) From a737f6b41828ba3d7a437d4d3bc7a444ac7d93c2 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 13 Aug 2025 14:53:29 +0200 Subject: [PATCH 22/50] Changed output dimension to correct number. --- src/layers/forcing_dissipation_layers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/forcing_dissipation_layers.jl b/src/layers/forcing_dissipation_layers.jl index f9348e14..75b25a0e 100644 --- a/src/layers/forcing_dissipation_layers.jl +++ b/src/layers/forcing_dissipation_layers.jl @@ -40,7 +40,7 @@ function build_chain(dim::Integer, width::Integer, nhidden::Integer, parameter_l Chain( Dense(dim÷2 + parameter_length, width, activation), inner_layers..., - Linear(width, 1; use_bias = false) + Linear(width, dim÷2; use_bias = false) ) end From 911158d7b61d9ac71faf1780aadba91cda0b5447 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Fri, 15 Aug 2025 11:05:59 +0200 Subject: [PATCH 23/50] Added test for single layer symbolic pullback for PGHNN. --- ...hnn_symbolic_pullback_single_layer_test.jl | 66 +++++++++++++++++++ test/runtests.jl | 4 +- 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 test/generalized_hamiltonian_neural_networks/pghnn_symbolic_pullback_single_layer_test.jl diff --git a/test/generalized_hamiltonian_neural_networks/pghnn_symbolic_pullback_single_layer_test.jl b/test/generalized_hamiltonian_neural_networks/pghnn_symbolic_pullback_single_layer_test.jl new file mode 100644 index 00000000..2a87fc0e --- /dev/null +++ b/test/generalized_hamiltonian_neural_networks/pghnn_symbolic_pullback_single_layer_test.jl @@ -0,0 +1,66 @@ +using GeometricMachineLearning +using GeometricMachineLearning: Activation, SymplecticEulerB, SymplecticPotentialEnergy, SymbolicNeuralNetwork, ParametricLoss, QPT + +params, dim, width, nhidden, activation = (m = 1., ω = π / 2), 2, 2, 1, tanh + +se = GeometricMachineLearning.SymbolicPotentialEnergy(dim, width, nhidden, activation; parameters = params) +l = SymplecticEulerB(se; return_parameters=false) +c = Chain(l) +nn = NeuralNetwork(c) + +cache = Dict() +symbolic_network_parameters = GeometricMachineLearning.SymbolicNeuralNetworks.symbolize!(cache, nn.params, :W) +cache2 = Dict() +symbolic_system_parameters = GeometricMachineLearning.SymbolicNeuralNetworks.symbolize!(cache2, params, :S) +loss = ParametricLoss() + +function GeometricMachineLearning.ParameterHandling.flatten(::Type{T}, ps::NeuralNetworkParameters) where {T<:Real} + _ps = NamedTuple{keys(ps)}(values(ps)) + x_vec, unflatten_to_NamedTuple = GeometricMachineLearning.ParameterHandling.flatten(T, _ps) + function unflatten_to_NeuralNetworkParameters(v::Vector{T}) + nt = unflatten_to_NamedTuple(v) + NeuralNetworkParameters{keys(nt)}(values(nt)) + end + x_vec, unflatten_to_NeuralNetworkParameters +end +function GeometricMachineLearning.ParameterHandling.flatten(T::Type{GeometricMachineLearning.SymbolicNeuralNetworks.Symbolics.Num}, v::GeometricMachineLearning.SymbolicNeuralNetworks.Symbolics.Arr{GeometricMachineLearning.SymbolicNeuralNetworks.Symbolics.Num, 1}) + GeometricMachineLearning.ParameterHandling.flatten(T, [elem for elem ∈ v]) +end + +flattened_params = GeometricMachineLearning.ParameterHandling.flatten(GeometricMachineLearning.SymbolicNeuralNetworks.Symbolics.Num, symbolic_system_parameters) +GeometricMachineLearning.SymbolicNeuralNetworks.Symbolics.@variables sinput[1:(dim + length(params))] +GeometricMachineLearning.SymbolicNeuralNetworks.Symbolics.@variables soutput[1:GeometricMachineLearning.output_dimension(nn.model)] +# gen_fun = GeometricMachineLearning.SymbolicNeuralNetworks._build_nn_function(symbolic_pullbacks[1].L1.L1.W, snn.params, GeometricMachineLearning.concatenate_array_with_parameters(snn.input, symbolic_system_parameters), soutput) +symbolic_loss = loss(nn.model, symbolic_network_parameters, sinput[1:dim], soutput, flattened_params[2]([elem for elem ∈ sinput[(dim+1):end]])) +symbolic_diffs = GeometricMachineLearning.SymbolicNeuralNetworks.symbolic_differentials(symbolic_network_parameters) +symbolic_pullbacks = [GeometricMachineLearning.SymbolicNeuralNetworks.symbolic_derivative(f_single, symbolic_diffs) for f_single ∈ symbolic_loss] + +""" + semi_flatten_network_parameters(params) + +Should be used together with [`GeneralizedHamiltonianArchitecture`](@ref) and `SymbolicPullback`s. +""" +function semi_flatten_network_parameters(::Type{T}, params::NeuralNetworkParameters) where {T} + _values = Tuple(GeometricMachineLearning.ParameterHandling.flatten(T, value)[1] for value ∈ values(params)) + NeuralNetworkParameters{keys(params)}(_values) +end + +semi_flattened_network_parameters = semi_flatten_network_parameters(GeometricMachineLearning.SymbolicNeuralNetworks.Symbolics.Num, symbolic_network_parameters) +pbs_executable = GeometricMachineLearning.SymbolicNeuralNetworks.build_nn_function(symbolic_pullbacks, semi_flattened_network_parameters, sinput, soutput) +function pbs(input, output, params) + T = eltype(input) + pullback(::Union{Real, AbstractArray{<:Real}}) = GeometricMachineLearning._get_contents(GeometricMachineLearning._get_params(pbs_executable(input, output, semi_flatten_network_parameters(T, params)))) + pullback +end + +function (_pullback::SymbolicPullback)(ps, model, input_and_parameters::Tuple{<:AbstractArray, <:AbstractArray, <:Union{NamedTuple, AbstractVector}})::Tuple + input, output, system_params = input_and_parameters + _pullback(ps, model, (concatenate_array_with_parameters(input, system_params), output)) +end +function (_pullback::SymbolicPullback)(ps, model, input_and_parameters::Tuple{<:QPT, <:QPT, <:Union{NamedTuple, AbstractVector}})::Tuple + input, output, system_params = input_and_parameters + _pullback(ps, model, (vcat(input.q, input.p), vcat(output.q, output.p), system_params)) +end + +_pullback = GeometricMachineLearning.SymbolicNeuralNetworks.SymbolicPullback(loss, pbs) +_pullback.fun(rand(4, 10), rand(2, 10), nn.params)(1.) \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 27304847..73a993bb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -82,4 +82,6 @@ using Documenter: doctest @safetestset "Linear Symplectic Attention " begin include("linear_symplectic_attention.jl") end @safetestset "Linear Symplectic Transformer " begin include("linear_symplectic_transformer.jl") end -@safetestset "DataLoader for input and output " begin include("data_loader/data_loader_for_input_and_output.jl") end \ No newline at end of file +@safetestset "DataLoader for input and output " begin include("data_loader/data_loader_for_input_and_output.jl") end + +@safetestset "Test symbolic pullback for PGHNN with single layer " begin include("generalized_hamiltonian_neural_networks/pghnn_symbolic_pullback_single_layer_test.jl") \ No newline at end of file From 45c2ed564e65ff5b74f17545a9eb0f7c8b6fc2ca Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Fri, 15 Aug 2025 11:06:44 +0200 Subject: [PATCH 24/50] Fixed symbolic pullback for pghnn (improvements still needed?). --- .../generalized_hamiltonian_neural_network.jl | 50 +++++++++-- src/optimizers/optimizer.jl | 4 + src/pullbacks/symbolic_hnn_pullback.jl | 86 +++++++++++++++++++ 3 files changed, 132 insertions(+), 8 deletions(-) diff --git a/src/architectures/generalized_hamiltonian_neural_network.jl b/src/architectures/generalized_hamiltonian_neural_network.jl index 5d74c401..53f95efa 100644 --- a/src/architectures/generalized_hamiltonian_neural_network.jl +++ b/src/architectures/generalized_hamiltonian_neural_network.jl @@ -11,11 +11,12 @@ struct SymbolicEnergy{AT <: Activation, PT, Kinetic} parameter_convert::PT activation::AT - function SymbolicEnergy(dim, width, nhidden, activation::Activation; parameters::OptionalParameters=NullParameters(), type) + function SymbolicEnergy(dim, width, nhidden, activation; parameters::OptionalParameters=NullParameters(), type) @assert iseven(dim) "The input dimension must be an even integer!" flattened_parameters = ParameterHandling.flatten(parameters) parameter_length = length(flattened_parameters[1]) - new{typeof(activation), typeof(flattened_parameters[2]), type}(dim, width, nhidden, parameter_length, flattened_parameters[2], activation) + _activation = Activation(activation) + new{typeof(_activation), typeof(flattened_parameters[2]), type}(dim, width, nhidden, parameter_length, flattened_parameters[2], _activation) end end @@ -28,11 +29,11 @@ A `const` derived from [`SymbolicEnergy`](@ref). # Constructors -```jldoctest; setup=:(using GeometricMachineLearning) -julia> params, dim = (m = 1., ω = π / 2), 2 -((m = 1.0, ω = 1.5707963267948966), 2) +```jldoctest; setup=:(using GeometricMachineLearning; using GeometricMachineLearning: Activation) +julia> params, dim, width, nhidden, activation = (m = 1., ω = π / 2), 2, 2, 1, tanh +((m = 1.0, ω = 1.5707963267948966), 2, 2, 1, tanh) -julia> se = GeometricMachineLearning.SymbolicPotentialEnergy(dim; parameters = params); +julia> se = GeometricMachineLearning.SymbolicPotentialEnergy(dim, width, nhidden, activation; parameters = params); ``` @@ -147,13 +148,34 @@ function concatenate_array_with_parameters(qp::AbstractVector, params::OptionalP vcat(qp, ParameterHandling.flatten(params)[1]) end +function concatenate_array_with_parameters(qp::AbstractMatrix, params::OptionalParameters) + @assert size(qp, 2) == 1 + vcat(qp, repeat(ParameterHandling.flatten(params)[1], 1, size(qp, 2))) +end + +function concatenate_array_with_parameters(qp::AbstractArray{T, 3}, params::AbstractVector) where {T} + @assert size(qp, 3) == length(params) + matrices = Tuple(concatenate_array_with_parameters(qp[:, :, i], params[i]) for i in axes(qp, 3)) + cat(matrices...; dims = 3) +end + +function concatenate_array_with_parameters(qp::SymbolicNeuralNetworks.Symbolics.Arr{SymbolicNeuralNetworks.Symbolics.Num, 1}, params::NamedTuple) + SymbolicNeuralNetworks.Symbolics.Arr(vcat(qp, values(params)...)) +end + +function concatenate_array_with_parameters(qp::AbstractArray{SymbolicNeuralNetworks.Symbolics.Num, 1}, params::NamedTuple) + concatenate_array_with_parameters(SymbolicNeuralNetworks.Symbolics.Arr(qp), params) +end + +Base.eltype(::SymbolicNeuralNetworks.Symbolics.Arr{SymbolicNeuralNetworks.Symbolics.Num}) = SymbolicNeuralNetworks.Symbolics.Num + # function concatenate_array_with_parameters(qp::AbstractMatrix, params::OptionalParameters) # hcat((concatenate_array_with_parameters(qp[:, i], params) for i in axes(qp, 2))...) # end function concatenate_array_with_parameters(qp::AbstractMatrix, params::AbstractVector) @assert _size(qp, 2) == length(params) - LazyArrays.Vcat((concatenate_array_with_parameters(@view(qp[:, i]), params[i]) for i in axes(params, 1))...) + vcat((concatenate_array_with_parameters(@view(qp[:, i]), params[i]) for i in axes(params, 1))...) end concatenate_array_with_parameters(a::AbstractArray{T, 3}, ::GeometricBase.NullParameters) where {T} = a @@ -273,4 +295,16 @@ function (c::Chain)(qp::QPT2{T, 3}, system_params::AbstractVector, ps::Union{Nam q_output = hcat([single_output_vectorwise.q for single_output_vectorwise ∈ output_vectorwise]...) p_output = hcat([single_output_vectorwise.p for single_output_vectorwise ∈ output_vectorwise]...) (q = reshape(q_output, size(q_output, 1), 1, size(q_output, 2)), p = reshape(p_output, size(p_output, 1), 1, size(p_output, 2))) -end \ No newline at end of file +end + +function (c::Chain)(qp::AbstractArray{T, 2}, system_params::AbstractVector, ps::Union{NamedTuple, NeuralNetworkParameters}) where {T} + @assert _size(qp, 2) == length(system_params) + qp_reshaped = reshape(qp, size(qp, 1), 1, length(system_params)) + @assert iseven(size(qp_reshaped, 1)) + n = size(qp_reshaped, 1)÷2 + qp_split = assign_q_and_p(qp_reshaped, n) + c_output = c(qp_split, system_params, ps)::QPT + reshape(vcat(c_output.q, c_output.p), 2n, length(system_params)) +end + +AbstractNeuralNetworks.networkbackend(::LazyArrays.ApplyArray) = AbstractNeuralNetworks.CPU() \ No newline at end of file diff --git a/src/optimizers/optimizer.jl b/src/optimizers/optimizer.jl index 9231d609..f81bbc68 100644 --- a/src/optimizers/optimizer.jl +++ b/src/optimizers/optimizer.jl @@ -173,6 +173,10 @@ function rgrad(ps::NeuralNetworkParameters, dp::NamedTuple) rgrad(NamedTuple{keys(ps)}(values(ps)), _get_params_without_warning(dp)) end +function rgrad(ps::NeuralNetworkParameters, dp::NeuralNetworkParameters) + rgrad(ps, NamedTuple{keys(dp)}(values(dp))) +end + function rgrad(Y::AbstractVecOrMat, dx::AbstractVecOrMat) @assert size(Y) == size(dx) dx diff --git a/src/pullbacks/symbolic_hnn_pullback.jl b/src/pullbacks/symbolic_hnn_pullback.jl index b0899e03..a514676c 100644 --- a/src/pullbacks/symbolic_hnn_pullback.jl +++ b/src/pullbacks/symbolic_hnn_pullback.jl @@ -11,4 +11,90 @@ function SymbolicPullback(arch::HamiltonianArchitecture) nn = SymbolicNeuralNetwork(arch) loss = HNNLoss(arch) SymbolicPullback(nn, loss) +end + +# TODO: This constitutes type piracy - should be in `SymbolicNeuralNetworks`! +# TODO: note that the the whole concatenation business may not be the ideal solution, but implementing a separate build_nn_function for any number of input arguments isn't either. Find something better here!! +function (_pullback::SymbolicPullback)(ps, model, input_and_parameters::Tuple{<:QPTOAT, <:QPTOAT, <:Union{AbstractVector, NamedTuple}})::Tuple + input, output, system_params = input_and_parameters + _pullback.loss(model, ps, concatenate_array_with_parameters(input, system_params), output), _pullback.fun(input, output, system_params, ps) +end + +function assign_q_and_p(z::SymbolicNeuralNetworks.Symbolics.Arr{SymbolicNeuralNetworks.Symbolics.Num, 1}, N::Int) + @assert length(z) == 2N + (q = z[1:N], p = z[(N+1):2N]) +end + +function SymbolicNeuralNetworks.Symbolics.SymbolicUtils.Code.create_array(::Type{<:SymbolicNeuralNetworks.Symbolics.Arr}, ::Nothing, ::Val, ::Val{dims}, elems...) where {dims} + SymbolicNeuralNetworks.Symbolics.Arr([elems...]) +end + +function ParameterHandling.flatten(::Type{T}, ps::NeuralNetworkParameters) where {T<:Real} + _ps = NamedTuple{keys(ps)}(values(ps)) + x_vec, unflatten_to_NamedTuple = ParameterHandling.flatten(T, _ps) + function unflatten_to_NeuralNetworkParameters(v::Vector{T}) + nt = unflatten_to_NamedTuple(v) + NeuralNetworkParameters{keys(nt)}(values(nt)) + end + x_vec, unflatten_to_NeuralNetworkParameters +end +function ParameterHandling.flatten(T::Type{SymbolicNeuralNetworks.Symbolics.Num}, v::SymbolicNeuralNetworks.Symbolics.Arr{SymbolicNeuralNetworks.Symbolics.Num, 1}) + ParameterHandling.flatten(T, [elem for elem ∈ v]) +end + +""" + semi_flatten_network_parameters(params) + +Should be used together with [`GeneralizedHamiltonianArchitecture`](@ref) and `SymbolicPullback`s. +""" +function semi_flatten_network_parameters(::Type{T}, params::NeuralNetworkParameters) where {T} + _values = Tuple(ParameterHandling.flatten(T, value)[1] for value ∈ values(params)) + NeuralNetworkParameters{keys(params)}(_values) +end + +""" + SymbolicPullback(nn, loss, system_params) + +The `SymbolicPullback` for the case when we have a parametrized system (with `system_params`). +""" +function SymbolicPullback(nn::NeuralNetwork, loss::ParametricLoss, system_params::OptionalParameters) + cache = Dict() + symbolic_system_parameters = SymbolicNeuralNetworks.symbolize!(cache, system_params, :S) + cache = Dict() + symbolic_network_parameters = SymbolicNeuralNetworks.symbolize!(cache, nn.params, :W) + + input_dim = input_dimension(nn.model) + output_dim = output_dimension(nn.model) + flattened_params = ParameterHandling.flatten(SymbolicNeuralNetworks.Symbolics.Num, symbolic_system_parameters) + SymbolicNeuralNetworks.Symbolics.@variables sinput[1:(input_dim + length(system_params))] + SymbolicNeuralNetworks.Symbolics.@variables soutput[1:output_dim] + symbolic_loss = loss(nn.model, symbolic_network_parameters, sinput[1:input_dim], soutput, flattened_params[2]([elem for elem ∈ sinput[(input_dim+1):end]])) + symbolic_diffs = SymbolicNeuralNetworks.symbolic_differentials(symbolic_network_parameters) + symbolic_pullbacks = [SymbolicNeuralNetworks.symbolic_derivative(f_single, symbolic_diffs) for f_single ∈ symbolic_loss] + + semi_flattened_network_parameters = semi_flatten_network_parameters(SymbolicNeuralNetworks.Symbolics.Num, symbolic_network_parameters) + pbs_executable = SymbolicNeuralNetworks.build_nn_function(symbolic_pullbacks, semi_flattened_network_parameters, sinput, soutput) + function pbs(input, output, params) + T = (typeof(input) <: QPT) ? eltype(input.q) : eltype(input) + pullback(::Union{Real, AbstractArray{<:Real}}) = _get_contents(_get_params(pbs_executable(input, output, semi_flatten_network_parameters(T, params)))) + pullback + end + SymbolicPullback(loss, pbs) +end + +function (_pullback::SymbolicPullback)(ps, model, input_and_parameters::Tuple{<:AbstractArray, <:AbstractArray, <:Union{NamedTuple, AbstractVector}})::Tuple + input, output, system_params = input_and_parameters + _pullback(ps, model, (concatenate_array_with_parameters(input, system_params), output)) +end + +function (_pullback::SymbolicPullback)(ps, model, input_and_parameters::Tuple{AT, AT, <:Union{NamedTuple, AbstractVector}})::Tuple where {T, AT <: AbstractArray{T, 3}} + input, output, system_params = input_and_parameters + _input = reshape(input, size(input, 1), size(input, 2) * size(input, 3)) + _output = reshape(output, size(output, 1), size(output, 2) * size(output, 3)) + _pullback.loss(model, ps, _input, _output, system_params), _pullback.fun(concatenate_array_with_parameters(_input, system_params), _output, ps) +end + +function (_pullback::SymbolicPullback)(ps, model, input_and_parameters::Tuple{<:QPT, <:QPT, <:Union{NamedTuple, AbstractVector}})::Tuple + input, output, system_params = input_and_parameters + _pullback(ps, model, (vcat(input.q, input.p), vcat(output.q, output.p), system_params)) end \ No newline at end of file From f454fd82b7e2e4654b73796511001568f8790838 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Fri, 15 Aug 2025 11:07:05 +0200 Subject: [PATCH 25/50] Using symbolic pullback now in the script. --- ...imeDependentHarmonicOscillator_Analytic.jl | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/scripts/TimeDependentHarmonicOscillator_Analytic.jl b/scripts/TimeDependentHarmonicOscillator_Analytic.jl index ca234037..2debbbea 100644 --- a/scripts/TimeDependentHarmonicOscillator_Analytic.jl +++ b/scripts/TimeDependentHarmonicOscillator_Analytic.jl @@ -1,6 +1,6 @@ using HDF5 using GeometricMachineLearning -using GeometricMachineLearning: QPT, QPT2, Activation +using GeometricMachineLearning: QPT, QPT2, Activation, ParametricLoss, SymbolicNeuralNetwork, SymbolicPullback using CairoMakie using NNlib: relu @@ -109,20 +109,21 @@ end dl = load_time_dependent_harmonic_oscillator_with_parametric_data_loader((q = q, p = p), t, IC) # This sets up the neural network -width::Int = 2 +width::Int = 3 nhidden::Int = 1 n_integrators::Int = 1 -arch = GeneralizedHamiltonianArchitecture(2; activation = relu, width = width, nhidden = nhidden, n_integrators = n_integrators, parameters = turn_parameters_into_correct_format(t, IC)[1]) +# sigmoid_linear_unit(x::T) where {T<:Number} = x / (T(1) + exp(-x)) +arch = GeneralizedHamiltonianArchitecture(2; activation = tanh, width = width, nhidden = nhidden, n_integrators = n_integrators, parameters = turn_parameters_into_correct_format(t, IC)[1]) nn = NeuralNetwork(arch) # This is where training starts -function train_network(batch_size::Integer=10, method=AdamOptimizer(), n_epochs=10) - batch = Batch(batch_size) - o = Optimizer(method, nn) - o(nn, dl, batch, n_epochs) -end - -loss_array = train_network() +batch_size = 128 +n_epochs = 200 +batch = Batch(batch_size) +o = Optimizer(AdamOptimizer(), nn) +loss = ParametricLoss() +_pb = SymbolicPullback(nn, loss, turn_parameters_into_correct_format(t, IC)[1]); +loss_array = o(nn, dl, batch, n_epochs, loss, _pb) trajectory_number = 20 From 96dfe9f24d72ff427f923b1495dbbf5b471b1406 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Sun, 17 Aug 2025 17:05:54 +0200 Subject: [PATCH 26/50] Changed constant determining training data. --- scripts/TimeDependentHarmonicOscillator_Analytic.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/TimeDependentHarmonicOscillator_Analytic.jl b/scripts/TimeDependentHarmonicOscillator_Analytic.jl index 2debbbea..dff531e6 100644 --- a/scripts/TimeDependentHarmonicOscillator_Analytic.jl +++ b/scripts/TimeDependentHarmonicOscillator_Analytic.jl @@ -9,8 +9,8 @@ omega = 1.0 # natural frequency of the harmonic Oscillator Omega = 3.5 # frequency of the external sinusoidal forcing F = .9 # amplitude of the external sinusoidal forcing ni_dim = 10 # number of initial conditions per dimension (so ni_dim^2 total) -T = 2π -nt = 500 # number of time steps +T = 2π * 5 +nt = 1000 # number of time steps dt = T/nt # time step # Generating the initial condition array From ee9c89b9c64e921c89b34db0494e195c0c7307de Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 18 Aug 2025 14:11:51 +0200 Subject: [PATCH 27/50] Implemented a forced version of PGHNNs. --- src/GeometricMachineLearning.jl | 3 +- ..._generalized_hamiltonian_neural_network.jl | 31 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 src/architectures/forced_generalized_hamiltonian_neural_network.jl diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl index bb23be81..6a457afe 100644 --- a/src/GeometricMachineLearning.jl +++ b/src/GeometricMachineLearning.jl @@ -286,7 +286,7 @@ module GeometricMachineLearning export ClassificationTransformer, ClassificationLayer export VolumePreservingFeedForward export SymplecticAutoencoder, PSDArch - export HamiltonianArchitecture, StandardHamiltonianArchitecture, GeneralizedHamiltonianArchitecture + export HamiltonianArchitecture, StandardHamiltonianArchitecture, GeneralizedHamiltonianArchitecture, ForcedGeneralizedHamiltonianArchitecture export solve!, encoder, decoder @@ -315,6 +315,7 @@ module GeometricMachineLearning include("data_loader/parametric_data_loader.jl") include("architectures/forced_sympnet.jl") + include("architectures/forced_generalized_hamiltonian_neural_network.jl") export ForcedSympNet export default_optimizer diff --git a/src/architectures/forced_generalized_hamiltonian_neural_network.jl b/src/architectures/forced_generalized_hamiltonian_neural_network.jl new file mode 100644 index 00000000..7372ba61 --- /dev/null +++ b/src/architectures/forced_generalized_hamiltonian_neural_network.jl @@ -0,0 +1,31 @@ +""" + ForcedGeneralizedHamiltonianArchitecture <: HamiltonianArchitecture + +A version of [`GeneralizedHamiltonianArchitecture`](@ref) that includes forcing/dissipation terms. Also compare this to [`ForcedSympNet`](@ref). +""" +struct ForcedGeneralizedHamiltonianArchitecture{AT, PT <: OptionalParameters} <: HamiltonianArchitecture{AT} + dim::Int + width::Int + nhidden::Int + n_integrators::Int + parameters::PT + activation::AT + + function ForcedGeneralizedHamiltonianArchitecture(dim; width=dim, nhidden=HNN_nhidden_default, n_integrators::Integer=1, activation=HNN_activation_default, parameters=NullParameters()) + activation = (typeof(activation) <: Activation) ? activation : Activation(activation) + new{typeof(activation), typeof(parameters)}(dim, width, nhidden, n_integrators, parameters, activation) + end +end + +function Chain(arch::ForcedGeneralizedHamiltonianArchitecture) + layers = () + kinetic_energy = SymbolicKineticEnergy(arch.dim, arch.width, arch.nhidden, arch.activation; parameters=arch.parameters) + potential_energy = SymbolicPotentialEnergy(arch.dim, arch.width, arch.nhidden, arch.activation; parameters=arch.parameters) + for i ∈ 1:arch.n_integrators + layers = (layers..., SymplecticEulerA(kinetic_energy; return_parameters = true)) + layers = (layers..., SymplecticEulerB(potential_energy; return_parameters = true)) + _return_parameters = !(i == arch.n_integrators) + layers = (layers..., ForcingLayerP(arch.dim, arch.width, arch.nhidden, arch.activation; parameters=arch.parameters, return_parameters=_return_parameters)) + end + Chain(layers...) +end \ No newline at end of file From 2c2f4ca6cf446a6e5b3a814d0b2ce6f638f2fc75 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 18 Aug 2025 14:12:57 +0200 Subject: [PATCH 28/50] Adjusted the previous script to use forced PGHNNs. --- ...rcedGeneralizedHamiltonianNeuralNetwork.jl | 151 ++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 scripts/TimeDependentHarmonicOscillatorForcedGeneralizedHamiltonianNeuralNetwork.jl diff --git a/scripts/TimeDependentHarmonicOscillatorForcedGeneralizedHamiltonianNeuralNetwork.jl b/scripts/TimeDependentHarmonicOscillatorForcedGeneralizedHamiltonianNeuralNetwork.jl new file mode 100644 index 00000000..102b3089 --- /dev/null +++ b/scripts/TimeDependentHarmonicOscillatorForcedGeneralizedHamiltonianNeuralNetwork.jl @@ -0,0 +1,151 @@ +using HDF5 +using GeometricMachineLearning +using GeometricMachineLearning: QPT, QPT2, Activation, ParametricLoss, SymbolicNeuralNetwork, SymbolicPullback +using CairoMakie +using NNlib: relu + +# PARAMETERS +omega = 1.0 # natural frequency of the harmonic Oscillator +Omega = 3.5 # frequency of the external sinusoidal forcing +F = .0 # .9 # amplitude of the external sinusoidal forcing +ni_dim = 10 # number of initial conditions per dimension (so ni_dim^2 total) +T = 2π * 5 +nt = 1000 # number of time steps +dt = T/nt # time step + +# Generating the initial condition array +IC = vec( [(q=q0, p=p0) for q0 in range(-1, 1, ni_dim), p0 in range(-1, 1, ni_dim)] ) + +# Generating the solution array +ni = ni_dim^2 +q = zeros(Float64, ni, nt+1) +p = zeros(Float64, ni, nt+1) +t = collect(dt * range(0, nt, step=1)) + +""" +Turn a vector of numbers into a vector of `NamedTuple`s to be used by `ParametricDataLoader`. +""" +function turn_parameters_into_correct_format(t::AbstractVector, IC::AbstractVector{<:NamedTuple}) + vec_of_params = NamedTuple[] + for time_step ∈ t + time_step == t[end] || push!(vec_of_params, (t = time_step, )) + end + vcat((vec_of_params for _ in axes(IC, 1))...) +end + +for i in 1:nt+1 + for j=1:ni + q[j,i] = ( IC[j].p - Omega*F/(omega^2-Omega^2) )/ omega *sin(omega*t[i]) + IC[j].q*cos(omega*t[i]) + F/(omega^2-Omega^2)*sin(Omega*t[i]) + p[j,i] = -omega^2*IC[j].q*sin(omega*t[i]) + ( IC[j].p - Omega*F/(omega^2-Omega^2) )*cos(omega*t[i]) + Omega*F/(omega^2-Omega^2)*cos(Omega*t[i]) + # q[j,i] = ( IC[j].p - Omega*F/(omega^2-Omega^2) )/ omega *exp(-omega*t[i]) - IC[j].q*exp(-omega*t[i]) + F/(omega^2-Omega^2)*exp(-Omega*t[i]) + # p[j,i] = -omega^2*IC[j].q*exp(-omega*t[i]) + ( IC[j].p + Omega*F/(omega^2-Omega^2) )*exp(-omega*t[i]) - Omega*F/(omega^2-Omega^2)*exp(-Omega*t[i]) + end + +end + +@doc raw""" +Turn a `NamedTuple` of ``(q,p)`` data into two tensors of the correct format. + +This is the tricky part as the structure of the input array(s) needs to conform with the structure of the parameters. + +Here the data are rearranged in an array of size ``(n, 2, t_f - 1)`` where ``[t_0, t_1, \ldots, t_f]`` is the vector storing the time steps. + +If we deal with different initial conditions as well, we still put everything into the third (parameter) axis. + +# Example + +```jldoctest +using GeometricMachineLearning + +q = [1. 2. 3.; 4. 5. 6.] +p = [1.5 2.5 3.5; 4.5 5.5 6.5] +qp = (q = q, p = p) +turn_q_p_data_into_correct_format(qp) + +# output + +(q = [1.0 2.0; 4.0 5.0;;; 2.0 3.0; 5.0 6.0], p = [1.5 2.5; 4.5 5.5;;; 2.5 3.5; 5.5 6.5]) +``` +""" +function turn_q_p_data_into_correct_format(qp::QPT2{T, 2}) where {T} + number_of_time_steps = size(qp.q, 2) - 1 # not counting t₀ + number_of_initial_conditions = size(qp.q, 1) + q_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + p_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + for initial_condition_index ∈ 0:(number_of_initial_conditions - 1) + for time_index ∈ 1:number_of_time_steps + q_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index] + q_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index + 1] + p_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index] + p_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index + 1] + end + end + (q = q_array, p = p_array) +end + +# SAVING TO FILE + +# h5 = h5open(path, "w") +# write(h5, "q", q) +# write(h5, "p", p) +# write(h5, "t", t) +# +# attrs(h5)["ni"] = ni +# attrs(h5)["nt"] = nt +# attrs(h5)["dt"] = dt +# +# close(h5) + +""" +This takes time as a single additional parameter (third axis). +""" +function load_time_dependent_harmonic_oscillator_with_parametric_data_loader(qp::QPT{T}, t::AbstractVector{T}, IC::AbstractVector) where {T} + qp_reformatted = turn_q_p_data_into_correct_format(qp) + t_reformatted = turn_parameters_into_correct_format(t, IC) + ParametricDataLoader(qp_reformatted, t_reformatted) +end + +# This sets up the data loader +dl = load_time_dependent_harmonic_oscillator_with_parametric_data_loader((q = q, p = p), t, IC) + +# This sets up the neural network +width::Int = 2 +nhidden::Int = 1 +n_integrators::Int = 1 +# sigmoid_linear_unit(x::T) where {T<:Number} = x / (T(1) + exp(-x)) +arch = ForcedGeneralizedHamiltonianArchitecture(2; activation = tanh, width = width, nhidden = nhidden, n_integrators = n_integrators, parameters = turn_parameters_into_correct_format(t, IC)[1]) +nn = NeuralNetwork(arch) + +# This is where training starts +batch_size = 128 +n_epochs = 200 +batch = Batch(batch_size) +o = Optimizer(AdamOptimizer(), nn) +loss = ParametricLoss() +_pb = SymbolicPullback(nn, loss, turn_parameters_into_correct_format(t, IC)[1]); + +function train_network() + o(nn, dl, batch, n_epochs, loss, _pb) +end + +loss_array = train_network() + +trajectory_number = 20 + +# Testing the network +initial_conditions = (q = q[trajectory_number, 1], p = p[trajectory_number, 1]) +n_steps = nt +trajectory = (q = zeros(1, n_steps), p = zeros(1, n_steps)) +trajectory.q[:, 1] .= initial_conditions.q +trajectory.p[:, 1] .= initial_conditions.p +# note that we have to supply the parameters as a named tuple as well here: +for t_step ∈ 0:(n_steps-2) + qp_temporary = nn.model((q = [trajectory.q[1, t_step+1]], p = [trajectory.p[1, t_step+1]]), (t = t[t_step+1],), nn.params) + trajectory.q[:, t_step+2] .= qp_temporary.q + trajectory.p[:, t_step+2] .= qp_temporary.p +end + +fig = Figure() +ax = Axis(fig[1,1]) +lines!(ax, trajectory.q[1,:]; label="nn") +lines!(ax, q[trajectory_number,:]; label="analytic") \ No newline at end of file From db492d4076c64b25a0abfcb165077cf748ab0dfa Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Wed, 20 Aug 2025 18:17:30 +0200 Subject: [PATCH 29/50] Implemented parameterlength. --- src/architectures/generalized_hamiltonian_neural_network.jl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/architectures/generalized_hamiltonian_neural_network.jl b/src/architectures/generalized_hamiltonian_neural_network.jl index 53f95efa..470393ff 100644 --- a/src/architectures/generalized_hamiltonian_neural_network.jl +++ b/src/architectures/generalized_hamiltonian_neural_network.jl @@ -125,6 +125,10 @@ struct SymplecticEuler{M, N, FT<:Base.Callable, MT<:Chain, type, ReturnParameter energy_model::MT end +function parameterlength(integrator::SymplecticEuler) + parameterlength(integrator.energy_model) +end + function initialparameters(rng::Random.AbstractRNG, init_weight::AbstractNeuralNetworks.Initializer, integrator::SymplecticEuler, backend::KernelAbstractions.Backend, ::Type{T}) where {T} initialparameters(rng, init_weight, integrator.energy_model, backend, T) end From 2f950c81d44f158b1dfe89daa5992dc727c7b958 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Thu, 21 Aug 2025 12:37:46 +0200 Subject: [PATCH 30/50] Implemented parameterlength for the forcing layers. --- src/layers/forcing_dissipation_layers.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/layers/forcing_dissipation_layers.jl b/src/layers/forcing_dissipation_layers.jl index 75b25a0e..e28e0d2a 100644 --- a/src/layers/forcing_dissipation_layers.jl +++ b/src/layers/forcing_dissipation_layers.jl @@ -14,6 +14,8 @@ struct ForcingLayer{M, N, PT<:Base.Callable, CT, type, ReturnParameters} <: Abst model::CT end +parameterlength(l::ForcingLayer) = parameterlength(l.model) + function initialparameters(rng::Random.AbstractRNG, init_weight::AbstractNeuralNetworks.Initializer, integrator::ForcingLayer, backend::KernelAbstractions.Backend, ::Type{T}) where {T} initialparameters(rng, init_weight, integrator.model, backend, T) end From 9bc1f15ae50b8bd71fcc68ed81f3001dbaed6910 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Fri, 22 Aug 2025 15:09:08 +0200 Subject: [PATCH 31/50] Implemented a width parameter for the ResNet. --- src/GeometricMachineLearning.jl | 1 + src/architectures/resnet.jl | 11 ++++++----- src/layers/wide_resnet.jl | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 5 deletions(-) create mode 100644 src/layers/wide_resnet.jl diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl index 6a457afe..88da637f 100644 --- a/src/GeometricMachineLearning.jl +++ b/src/GeometricMachineLearning.jl @@ -156,6 +156,7 @@ module GeometricMachineLearning include("layers/forcing_dissipation_layers.jl") include("layers/bias_layer.jl") include("layers/resnet.jl") + include("layers/wide_resnet.jl") include("layers/manifold_layer.jl") include("layers/stiefel_layer.jl") include("layers/grassmann_layer.jl") diff --git a/src/architectures/resnet.jl b/src/architectures/resnet.jl index cc8e9851..90d86a68 100644 --- a/src/architectures/resnet.jl +++ b/src/architectures/resnet.jl @@ -23,20 +23,21 @@ where `dl` is an instance of `DataLoader`. See [`iterate`](@ref) for an example of this. """ struct ResNet{AT} <: NeuralNetworkIntegrator - sys_dim::Int - n_blocks::Int + sys_dim::Int + n_blocks::Int + width::Int activation::AT end -function ResNet(dl::DataLoader, n_blocks::Integer; activation = tanh) - ResNet(dl.input_dim, n_blocks, activation) +function ResNet(dl::DataLoader, n_blocks::Integer, width::Integer=dl.input_dim; activation = tanh) + ResNet(dl.input_dim, n_blocks, width, activation) end function Chain(arch::ResNet{AT}) where AT layers = () for _ in 1:arch.n_blocks # nonlinear layers - layers = (layers..., ResNetLayer(arch.sys_dim, arch.activation; use_bias=true)) + layers = (layers..., arch.sys_dim == arch.width ? ResNetLayer(arch.sys_dim, arch.activation; use_bias=true) : WideResNetLayer(arch.sys_dim, arch.width, arch.activation)) end # linear layers for the output diff --git a/src/layers/wide_resnet.jl b/src/layers/wide_resnet.jl new file mode 100644 index 00000000..62a60f98 --- /dev/null +++ b/src/layers/wide_resnet.jl @@ -0,0 +1,32 @@ +struct WideResNetLayer{M, N, F1} <: AbstractExplicitLayer{M, N} + width::Int + activation::F1 +end + +WideResNetLayer(dim::Integer, width::Integer, activation=identity) = WideResNetLayer{dim, dim, typeof(activation)}(width, activation) + +function initialparameters(rng::Random.AbstractRNG, init_weight::AbstractNeuralNetworks.Initializer, l::WideResNetLayer{M, M}, backend::KernelAbstractions.Backend, ::Type{T}; init_bias = ZeroInitializer()) where {M, T} + upscale_weight = KernelAbstractions.allocate(backend, T, l.width, M) + upscale_bias = KernelAbstractions.allocate(backend, T, l.width) + downscale_weight = KernelAbstractions.allocate(backend, T, M, l.width) + bias = KernelAbstractions.allocate(backend, T, M) + init_weight(rng, upscale_weight) + init_weight(rng, downscale_weight) + init_bias(rng, upscale_bias) + init_bias(rng, bias) + (upscale_weight=upscale_weight, downscale_weight=downscale_weight, upscale_bias=upscale_bias, bias=bias) +end + +parameterlength(l::WideResNetLayer{M, M}) where {M} = l.width * (M + 1) + M * (l.width + 1) + +(d::WideResNetLayer{M, M})(x::AbstractVecOrMat, ps::NamedTuple) where {M} = x + d.activation.(ps.downscale_weight * d.activation.(ps.upscale_weight * x + ps.upscale_bias) + ps.bias) + +(d::WideResNetLayer{M, M})(x::AbstractArray{T, 3}, ps::NamedTuple) where {M, T} = x + d.activation.(mat_tensor_mul(ps.downscale_weight, d.activation.(mat_tensor_mul(ps.upscale_weight, x) + ps.upscale_bias)) + ps.bias) + +function (d::WideResNetLayer{M, M})(z::QPT, ps::NamedTuple) where {M} + @assert iseven(M) + @assert size(z.q, 1) * 2 == M + N2 = M ÷ 2 + output = d(vcat(z.q, z.p), ps) + assign_q_and_p(output, N2) +end \ No newline at end of file From 0134c87ccd1487dc90cb4ac468d532d523a91254 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Fri, 22 Aug 2025 17:26:38 +0200 Subject: [PATCH 32/50] Fixed problem that emerged when using matrices or tensors as inputs (broadcasting for bias vector). --- src/architectures/resnet.jl | 2 +- src/layers/wide_resnet.jl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/architectures/resnet.jl b/src/architectures/resnet.jl index 90d86a68..b3524563 100644 --- a/src/architectures/resnet.jl +++ b/src/architectures/resnet.jl @@ -25,7 +25,7 @@ See [`iterate`](@ref) for an example of this. struct ResNet{AT} <: NeuralNetworkIntegrator sys_dim::Int n_blocks::Int - width::Int + width::Int activation::AT end diff --git a/src/layers/wide_resnet.jl b/src/layers/wide_resnet.jl index 62a60f98..f1a4c468 100644 --- a/src/layers/wide_resnet.jl +++ b/src/layers/wide_resnet.jl @@ -19,9 +19,9 @@ end parameterlength(l::WideResNetLayer{M, M}) where {M} = l.width * (M + 1) + M * (l.width + 1) -(d::WideResNetLayer{M, M})(x::AbstractVecOrMat, ps::NamedTuple) where {M} = x + d.activation.(ps.downscale_weight * d.activation.(ps.upscale_weight * x + ps.upscale_bias) + ps.bias) +(d::WideResNetLayer{M, M})(x::AbstractVecOrMat, ps::NamedTuple) where {M} = x + d.activation.(ps.downscale_weight * d.activation.(ps.upscale_weight * x .+ ps.upscale_bias) .+ ps.bias) -(d::WideResNetLayer{M, M})(x::AbstractArray{T, 3}, ps::NamedTuple) where {M, T} = x + d.activation.(mat_tensor_mul(ps.downscale_weight, d.activation.(mat_tensor_mul(ps.upscale_weight, x) + ps.upscale_bias)) + ps.bias) +(d::WideResNetLayer{M, M})(x::AbstractArray{T, 3}, ps::NamedTuple) where {M, T} = x + d.activation.(mat_tensor_mul(ps.downscale_weight, d.activation.(mat_tensor_mul(ps.upscale_weight, x) .+ ps.upscale_bias)) .+ ps.bias) function (d::WideResNetLayer{M, M})(z::QPT, ps::NamedTuple) where {M} @assert iseven(M) From 9d0f27dfbff65f6eaf196d6e1ae3539e44f39a59 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Sun, 21 Sep 2025 16:48:38 +0200 Subject: [PATCH 33/50] Added parametric resnet layers for comparison to PGHNNs. --- src/GeometricMachineLearning.jl | 2 + src/architectures/parametric_resnet.jl | 37 +++++++++++++++ src/layers/parametric_resnet_layer.jl | 65 ++++++++++++++++++++++++++ 3 files changed, 104 insertions(+) create mode 100644 src/architectures/parametric_resnet.jl create mode 100644 src/layers/parametric_resnet_layer.jl diff --git a/src/GeometricMachineLearning.jl b/src/GeometricMachineLearning.jl index 88da637f..bd144423 100644 --- a/src/GeometricMachineLearning.jl +++ b/src/GeometricMachineLearning.jl @@ -157,6 +157,7 @@ module GeometricMachineLearning include("layers/bias_layer.jl") include("layers/resnet.jl") include("layers/wide_resnet.jl") + include("layers/parametric_resnet_layer.jl") include("layers/manifold_layer.jl") include("layers/stiefel_layer.jl") include("layers/grassmann_layer.jl") @@ -261,6 +262,7 @@ module GeometricMachineLearning #INCLUDE ARCHITECTURES include("architectures/neural_network_integrator.jl") include("architectures/resnet.jl") + include("architectures/parametric_resnet.jl") include("architectures/transformer_integrator.jl") include("architectures/standard_transformer_integrator.jl") include("architectures/sympnet.jl") diff --git a/src/architectures/parametric_resnet.jl b/src/architectures/parametric_resnet.jl new file mode 100644 index 00000000..5652ef4c --- /dev/null +++ b/src/architectures/parametric_resnet.jl @@ -0,0 +1,37 @@ +struct ParametricResNet{AT <: Activation, PT <: OptionalParameters} <: NeuralNetworkIntegrator + sys_dim::Int + n_blocks::Int + width::Int + parameters::PT + activation::AT + + function ParametricResNet(dim; width=dim, n_blocks = HNN_nhidden_default, activation=HNN_activation_default, parameters=NullParameters()) + activation = (typeof(activation) <: Activation) ? activation : Activation(activation) + new{typeof(activation), typeof(parameters)}(dim, n_blocks, width, parameters, activation) + end +end + +function ParametricResNet(dl::DataLoader, n_blocks::Integer, width::Integer=dl.input_dim; activation=HNN_activation_default, parameters=NullParameters()) + ParametricResNet(dl.input_dim; width=width, n_blocks=n_blocks, activation) +end + +function ResNet(input_dim::Integer, n_blocks::Integer, width::Integer=input_dim; activation=HNN_activation_default, parameters=NullParameters()) + typeof(parameters) <: NullParameters ? ResNet(input_dim, n_blocks, width, activation) : ParametricResNet(input_dim; n_blocks=n_blocks, width=width, parameters=parameters, activation=activation) +end + +function ResNet(input_dim::Integer; n_blocks::Integer, width::Integer=input_dim, activation=HNN_activation_default, parameters=NullParameters()) + ResNet(input_dim, n_blocks, width; activation=activation, parameters=parameters) +end + +function Chain(arch::ParametricResNet{AT}) where AT + layers = () + for _ in 1:arch.n_blocks + # nonlinear layers + layers = (layers..., ParametricResNetLayer(arch.sys_dim, arch.width, arch.activation; parameters=arch.parameters, return_parameters=true)) + end + + # linear layers for the output + layers = (layers..., ParametricResNetLayer(arch.sys_dim, arch.width, identity; parameters=arch.parameters, return_parameters=false)) + + Chain(layers...) +end \ No newline at end of file diff --git a/src/layers/parametric_resnet_layer.jl b/src/layers/parametric_resnet_layer.jl new file mode 100644 index 00000000..6f97e1eb --- /dev/null +++ b/src/layers/parametric_resnet_layer.jl @@ -0,0 +1,65 @@ +struct ParametricResNetLayer{M, N, F1 <: Activation, PT, ReturnParameters} <: AbstractExplicitLayer{M, N} + width::Int + activation::F1 + parameter_length::Int + parameter_convert::PT +end + +function ParametricResNetLayer(dim::Integer, width::Integer, activation=identity; parameters::OptionalParameters=NullParameters(), return_parameters::Bool) + flattened_parameters = ParameterHandling.flatten(parameters) + parameter_length = length(flattened_parameters[1]) + _activation = Activation(activation) + ParametricResNetLayer{dim, dim, typeof(_activation), typeof(flattened_parameters[2]), return_parameters}(width, _activation, parameter_length, flattened_parameters[2]) +end + +function initialparameters(rng::Random.AbstractRNG, init_weight::AbstractNeuralNetworks.Initializer, l::ParametricResNetLayer{M, M}, backend::KernelAbstractions.Backend, ::Type{T}; init_bias = ZeroInitializer()) where {M, T} + upscale_weight = KernelAbstractions.allocate(backend, T, l.width, M + l.parameter_length) + upscale_bias = KernelAbstractions.allocate(backend, T, l.width) + downscale_weight = KernelAbstractions.allocate(backend, T, M, l.width) + bias = KernelAbstractions.allocate(backend, T, M) + init_weight(rng, upscale_weight) + init_weight(rng, downscale_weight) + init_bias(rng, upscale_bias) + init_bias(rng, bias) + (upscale_weight=upscale_weight, downscale_weight=downscale_weight, upscale_bias=upscale_bias, bias=bias) +end + +parameterlength(l::ParametricResNetLayer{M, M}) where {M} = (l.width + l.parameter_length) * (M + 1) + M * (l.width + 1) + +function (d::ParametricResNetLayer{M, M, F, PT, false})(x::AbstractVecOrMat, problem_params::OptionalParameters, ps::NamedTuple) where {M, F, PT} + input = concatenate_array_with_parameters(x, problem_params) + x + d.activation.(ps.downscale_weight * d.activation.(ps.upscale_weight * input .+ ps.upscale_bias) .+ ps.bias) +end + +function (d::ParametricResNetLayer{M, M, F, PT, true})(x::AbstractVecOrMat, problem_params::OptionalParameters, ps::NamedTuple) where {M, F, PT} + input = concatenate_array_with_parameters(x, problem_params) + (x + d.activation.(ps.downscale_weight * d.activation.(ps.upscale_weight * input .+ ps.upscale_bias) .+ ps.bias), problem_params) +end + +# function (d::ParametricResNetLayer{M, M, F, PT, false})(x::AbstractArray{T, 3}, problem_params::AbstractVector, ps::NamedTuple) where {M, F, PT, T} +# input = concatenate_array_with_parameters(x, problem_params) +# x + d.activation.(mat_tensor_mul(ps.downscale_weight, d.activation.(mat_tensor_mul(ps.upscale_weight, x) .+ ps.upscale_bias)) .+ ps.bias) +# end +# +# function (d::ParametricResNetLayer{M, M, F, PT, true})(x::AbstractArray{T, 3}, problem_params::AbstractVector, ps::NamedTuple) where {M, F, PT, T} +# input = concatenate_array_with_parameters(x, problem_params) +# (x + d.activation.(mat_tensor_mul(ps.downscale_weight, d.activation.(mat_tensor_mul(ps.upscale_weight, x) .+ ps.upscale_bias)) .+ ps.bias), problem_params) +# end + +(d::ParametricResNetLayer)(input::Tuple, ps::NamedTuple) = length(input) == 2 ? d(input..., ps) : error("The tuple must contain the input array/nt as well as the system parameters.") + +function (d::ParametricResNetLayer{M, M, F, PT, false})(z::QPT, problem_params::OptionalParameters, ps::NamedTuple) where {M, F, PT} + @assert iseven(M) + @assert size(z.q, 1) * 2 == M + N2 = M ÷ 2 + output = d(vcat(z.q, z.p), problem_params, ps) + assign_q_and_p(output, N2) +end + +function (d::ParametricResNetLayer{M, M, F, PT, true})(z::QPT, problem_params::OptionalParameters, ps::NamedTuple) where {M, F, PT} + @assert iseven(M) + @assert size(z.q, 1) * 2 == M + N2 = M ÷ 2 + output = d(vcat(z.q, z.p), problem_params, ps) + (assign_q_and_p(output[1], N2), problem_params) +end \ No newline at end of file From fce7554a72e8ffdf96657d1340db525579e73641 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Sun, 21 Sep 2025 16:49:15 +0200 Subject: [PATCH 34/50] Added lines for special case when input is a tensor. --- src/architectures/generalized_hamiltonian_neural_network.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/architectures/generalized_hamiltonian_neural_network.jl b/src/architectures/generalized_hamiltonian_neural_network.jl index 470393ff..b3f9d2cd 100644 --- a/src/architectures/generalized_hamiltonian_neural_network.jl +++ b/src/architectures/generalized_hamiltonian_neural_network.jl @@ -304,6 +304,12 @@ end function (c::Chain)(qp::AbstractArray{T, 2}, system_params::AbstractVector, ps::Union{NamedTuple, NeuralNetworkParameters}) where {T} @assert _size(qp, 2) == length(system_params) qp_reshaped = reshape(qp, size(qp, 1), 1, length(system_params)) + c(qp_reshaped, system_params, ps) +end + +function (c::Chain)(qp::AbstractArray{T, 3}, system_params::AbstractVector, ps::Union{NamedTuple, NeuralNetworkParameters}) where {T} + @assert size(qp, 3) == length(system_params) + @assert size(qp, 2) == 1 @assert iseven(size(qp_reshaped, 1)) n = size(qp_reshaped, 1)÷2 qp_split = assign_q_and_p(qp_reshaped, n) From 07bc2f666fd5ecf8a5a8b1e7a0a4666d8af35506 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Sun, 21 Sep 2025 17:08:28 +0200 Subject: [PATCH 35/50] Added script for parametric ResNet. --- ...ndentHarmonicOscillatorParametricResnet.jl | 150 ++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 scripts/TimeDependentHarmonicOscillatorParametricResnet.jl diff --git a/scripts/TimeDependentHarmonicOscillatorParametricResnet.jl b/scripts/TimeDependentHarmonicOscillatorParametricResnet.jl new file mode 100644 index 00000000..5b6e0f31 --- /dev/null +++ b/scripts/TimeDependentHarmonicOscillatorParametricResnet.jl @@ -0,0 +1,150 @@ +using HDF5 +using GeometricMachineLearning +using GeometricMachineLearning: QPT, QPT2, Activation, ParametricLoss, SymbolicNeuralNetwork, SymbolicPullback +using CairoMakie +using NNlib: relu + +# PARAMETERS +omega = 1.0 # natural frequency of the harmonic Oscillator +Omega = 3.5 # frequency of the external sinusoidal forcing +F = .0 # .9 # amplitude of the external sinusoidal forcing +ni_dim = 10 # number of initial conditions per dimension (so ni_dim^2 total) +T = 2π * 5 +nt = 1000 # number of time steps +dt = T/nt # time step + +# Generating the initial condition array +IC = vec( [(q=q0, p=p0) for q0 in range(-1, 1, ni_dim), p0 in range(-1, 1, ni_dim)] ) + +# Generating the solution array +ni = ni_dim^2 +q = zeros(Float64, ni, nt+1) +p = zeros(Float64, ni, nt+1) +t = collect(dt * range(0, nt, step=1)) + +""" +Turn a vector of numbers into a vector of `NamedTuple`s to be used by `ParametricDataLoader`. +""" +function turn_parameters_into_correct_format(t::AbstractVector, IC::AbstractVector{<:NamedTuple}) + vec_of_params = NamedTuple[] + for time_step ∈ t + time_step == t[end] || push!(vec_of_params, (t = time_step, )) + end + vcat((vec_of_params for _ in axes(IC, 1))...) +end + +for i in 1:nt+1 + for j=1:ni + q[j,i] = ( IC[j].p - Omega*F/(omega^2-Omega^2) )/ omega *sin(omega*t[i]) + IC[j].q*cos(omega*t[i]) + F/(omega^2-Omega^2)*sin(Omega*t[i]) + p[j,i] = -omega^2*IC[j].q*sin(omega*t[i]) + ( IC[j].p - Omega*F/(omega^2-Omega^2) )*cos(omega*t[i]) + Omega*F/(omega^2-Omega^2)*cos(Omega*t[i]) + # q[j,i] = ( IC[j].p - Omega*F/(omega^2-Omega^2) )/ omega *exp(-omega*t[i]) - IC[j].q*exp(-omega*t[i]) + F/(omega^2-Omega^2)*exp(-Omega*t[i]) + # p[j,i] = -omega^2*IC[j].q*exp(-omega*t[i]) + ( IC[j].p + Omega*F/(omega^2-Omega^2) )*exp(-omega*t[i]) - Omega*F/(omega^2-Omega^2)*exp(-Omega*t[i]) + end + +end + +@doc raw""" +Turn a `NamedTuple` of ``(q,p)`` data into two tensors of the correct format. + +This is the tricky part as the structure of the input array(s) needs to conform with the structure of the parameters. + +Here the data are rearranged in an array of size ``(n, 2, t_f - 1)`` where ``[t_0, t_1, \ldots, t_f]`` is the vector storing the time steps. + +If we deal with different initial conditions as well, we still put everything into the third (parameter) axis. + +# Example + +```jldoctest +using GeometricMachineLearning + +q = [1. 2. 3.; 4. 5. 6.] +p = [1.5 2.5 3.5; 4.5 5.5 6.5] +qp = (q = q, p = p) +turn_q_p_data_into_correct_format(qp) + +# output + +(q = [1.0 2.0; 4.0 5.0;;; 2.0 3.0; 5.0 6.0], p = [1.5 2.5; 4.5 5.5;;; 2.5 3.5; 5.5 6.5]) +``` +""" +function turn_q_p_data_into_correct_format(qp::QPT2{T, 2}) where {T} + number_of_time_steps = size(qp.q, 2) - 1 # not counting t₀ + number_of_initial_conditions = size(qp.q, 1) + q_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + p_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + for initial_condition_index ∈ 0:(number_of_initial_conditions - 1) + for time_index ∈ 1:number_of_time_steps + q_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index] + q_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index + 1] + p_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index] + p_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index + 1] + end + end + (q = q_array, p = p_array) +end + +# SAVING TO FILE + +# h5 = h5open(path, "w") +# write(h5, "q", q) +# write(h5, "p", p) +# write(h5, "t", t) +# +# attrs(h5)["ni"] = ni +# attrs(h5)["nt"] = nt +# attrs(h5)["dt"] = dt +# +# close(h5) + +""" +This takes time as a single additional parameter (third axis). +""" +function load_time_dependent_harmonic_oscillator_with_parametric_data_loader(qp::QPT{T}, t::AbstractVector{T}, IC::AbstractVector) where {T} + qp_reformatted = turn_q_p_data_into_correct_format(qp) + t_reformatted = turn_parameters_into_correct_format(t, IC) + ParametricDataLoader(qp_reformatted, t_reformatted) +end + +# This sets up the data loader +dl = load_time_dependent_harmonic_oscillator_with_parametric_data_loader((q = q, p = p), t, IC) + +# This sets up the neural network +width::Int = 2 +n_blocks::Int = 1 +n_integrators::Int = 1 +# sigmoid_linear_unit(x::T) where {T<:Number} = x / (T(1) + exp(-x)) +arch = ResNet(2, n_blocks=n_blocks, width=width; activation=tanh, parameters=turn_parameters_into_correct_format(t, IC)[1]) +nn = NeuralNetwork(arch) + +# This is where training starts +batch_size = 128 +n_epochs = 200 +batch = Batch(batch_size) +o = Optimizer(AdamOptimizer(), nn) +loss = ParametricLoss() + +function train_network() + o(nn, dl, batch, n_epochs, loss) +end + +loss_array = train_network() + +trajectory_number = 20 + +# Testing the network +initial_conditions = (q = q[trajectory_number, 1], p = p[trajectory_number, 1]) +n_steps = nt +trajectory = (q = zeros(1, n_steps), p = zeros(1, n_steps)) +trajectory.q[:, 1] .= initial_conditions.q +trajectory.p[:, 1] .= initial_conditions.p +# note that we have to supply the parameters as a named tuple as well here: +for t_step ∈ 0:(n_steps-2) + qp_temporary = nn.model((q = [trajectory.q[1, t_step+1]], p = [trajectory.p[1, t_step+1]]), (t = t[t_step+1],), nn.params) + trajectory.q[:, t_step+2] .= qp_temporary.q + trajectory.p[:, t_step+2] .= qp_temporary.p +end + +fig = Figure() +ax = Axis(fig[1,1]) +lines!(ax, trajectory.q[1,:]; label="nn") +lines!(ax, q[trajectory_number,:]; label="analytic") \ No newline at end of file From b1468991c1121ffab17f5c117614314eabab0aa8 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Sun, 21 Sep 2025 17:08:49 +0200 Subject: [PATCH 36/50] Fixed a ResNet test with an additional constructor. --- src/architectures/resnet.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/architectures/resnet.jl b/src/architectures/resnet.jl index b3524563..bc8829b6 100644 --- a/src/architectures/resnet.jl +++ b/src/architectures/resnet.jl @@ -29,6 +29,8 @@ struct ResNet{AT} <: NeuralNetworkIntegrator activation::AT end +ResNet(sys_dim::Integer, n_blocks::Integer, activation) = ResNet(sys_dim, n_blocks, sys_dim, activation) + function ResNet(dl::DataLoader, n_blocks::Integer, width::Integer=dl.input_dim; activation = tanh) ResNet(dl.input_dim, n_blocks, width, activation) end From 5cc6a36097076c075453a0ecbf52b6852e639901 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Sun, 21 Sep 2025 17:20:59 +0200 Subject: [PATCH 37/50] Added end. --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 73a993bb..8cd93ffb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -84,4 +84,4 @@ using Documenter: doctest @safetestset "DataLoader for input and output " begin include("data_loader/data_loader_for_input_and_output.jl") end -@safetestset "Test symbolic pullback for PGHNN with single layer " begin include("generalized_hamiltonian_neural_networks/pghnn_symbolic_pullback_single_layer_test.jl") \ No newline at end of file +@safetestset "Test symbolic pullback for PGHNN with single layer " begin include("generalized_hamiltonian_neural_networks/pghnn_symbolic_pullback_single_layer_test.jl") end \ No newline at end of file From b7aff8bc5c72c073e88699601f6a2807d3bceb0c Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Sat, 4 Oct 2025 12:27:18 +0200 Subject: [PATCH 38/50] Adjusted script so that forcing can be specified. --- ...rcedGeneralizedHamiltonianNeuralNetwork.jl | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/scripts/TimeDependentHarmonicOscillatorForcedGeneralizedHamiltonianNeuralNetwork.jl b/scripts/TimeDependentHarmonicOscillatorForcedGeneralizedHamiltonianNeuralNetwork.jl index 102b3089..df2c42f2 100644 --- a/scripts/TimeDependentHarmonicOscillatorForcedGeneralizedHamiltonianNeuralNetwork.jl +++ b/scripts/TimeDependentHarmonicOscillatorForcedGeneralizedHamiltonianNeuralNetwork.jl @@ -7,9 +7,9 @@ using NNlib: relu # PARAMETERS omega = 1.0 # natural frequency of the harmonic Oscillator Omega = 3.5 # frequency of the external sinusoidal forcing -F = .0 # .9 # amplitude of the external sinusoidal forcing +F = .9 # amplitude of the external sinusoidal forcing ni_dim = 10 # number of initial conditions per dimension (so ni_dim^2 total) -T = 2π * 5 +T = 2π * 20 nt = 1000 # number of time steps dt = T/nt # time step @@ -109,23 +109,28 @@ end dl = load_time_dependent_harmonic_oscillator_with_parametric_data_loader((q = q, p = p), t, IC) # This sets up the neural network -width::Int = 2 +width::Int = 1 nhidden::Int = 1 -n_integrators::Int = 1 +n_integrators::Int = 2 # sigmoid_linear_unit(x::T) where {T<:Number} = x / (T(1) + exp(-x)) -arch = ForcedGeneralizedHamiltonianArchitecture(2; activation = tanh, width = width, nhidden = nhidden, n_integrators = n_integrators, parameters = turn_parameters_into_correct_format(t, IC)[1]) -nn = NeuralNetwork(arch) +arch1 = ForcedGeneralizedHamiltonianArchitecture(2; activation = tanh, width = width, nhidden = nhidden, n_integrators = n_integrators, parameters = turn_parameters_into_correct_format(t, IC)[1], forcing_type = :P) +arch2 = ForcedGeneralizedHamiltonianArchitecture(2; activation = tanh, width = width, nhidden = nhidden, n_integrators = n_integrators, parameters = turn_parameters_into_correct_format(t, IC)[1], forcing_type = :Q) +nn1 = NeuralNetwork(arch1) +nn2 = NeuralNetwork(arch2) # This is where training starts batch_size = 128 n_epochs = 200 batch = Batch(batch_size) -o = Optimizer(AdamOptimizer(), nn) +o1 = Optimizer(AdamOptimizer(), nn1) +o2 = Optimizer(AdamOptimizer(), nn2) loss = ParametricLoss() -_pb = SymbolicPullback(nn, loss, turn_parameters_into_correct_format(t, IC)[1]); +_pb = SymbolicPullback(nn1, loss, turn_parameters_into_correct_format(t, IC)[1]); +_pb = SymbolicPullback(nn2, loss, turn_parameters_into_correct_format(t, IC)[1]); function train_network() - o(nn, dl, batch, n_epochs, loss, _pb) + o1(nn1, dl, batch, n_epochs, loss, _pb) + o2(nn2, dl, batch, n_epochs, loss, _pb) end loss_array = train_network() @@ -140,7 +145,7 @@ trajectory.q[:, 1] .= initial_conditions.q trajectory.p[:, 1] .= initial_conditions.p # note that we have to supply the parameters as a named tuple as well here: for t_step ∈ 0:(n_steps-2) - qp_temporary = nn.model((q = [trajectory.q[1, t_step+1]], p = [trajectory.p[1, t_step+1]]), (t = t[t_step+1],), nn.params) + qp_temporary = nn1.model((q = [trajectory.q[1, t_step+1]], p = [trajectory.p[1, t_step+1]]), (t = t[t_step+1],), nn1.params) trajectory.q[:, t_step+2] .= qp_temporary.q trajectory.p[:, t_step+2] .= qp_temporary.p end From 268c1efcea8375cfb520f75dcca99013deb255ca Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Sat, 4 Oct 2025 12:27:56 +0200 Subject: [PATCH 39/50] Introduced forcing type that can be specified. --- ...ced_generalized_hamiltonian_neural_network.jl | 11 ++++++----- src/architectures/forced_sympnet.jl | 16 +++++++++------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/architectures/forced_generalized_hamiltonian_neural_network.jl b/src/architectures/forced_generalized_hamiltonian_neural_network.jl index 7372ba61..3a1d19a6 100644 --- a/src/architectures/forced_generalized_hamiltonian_neural_network.jl +++ b/src/architectures/forced_generalized_hamiltonian_neural_network.jl @@ -3,7 +3,7 @@ A version of [`GeneralizedHamiltonianArchitecture`](@ref) that includes forcing/dissipation terms. Also compare this to [`ForcedSympNet`](@ref). """ -struct ForcedGeneralizedHamiltonianArchitecture{AT, PT <: OptionalParameters} <: HamiltonianArchitecture{AT} +struct ForcedGeneralizedHamiltonianArchitecture{FT, AT, PT <: OptionalParameters} <: HamiltonianArchitecture{AT} dim::Int width::Int nhidden::Int @@ -11,13 +11,14 @@ struct ForcedGeneralizedHamiltonianArchitecture{AT, PT <: OptionalParameters} <: parameters::PT activation::AT - function ForcedGeneralizedHamiltonianArchitecture(dim; width=dim, nhidden=HNN_nhidden_default, n_integrators::Integer=1, activation=HNN_activation_default, parameters=NullParameters()) + function ForcedGeneralizedHamiltonianArchitecture(dim; width=dim, nhidden=HNN_nhidden_default, n_integrators::Integer=1, activation=HNN_activation_default, parameters=NullParameters(), forcing_type::Symbol=:P) + forcing_type == :P || forcing_type == :Q || error("Forcing has to be either :Q or :P. It is $(forcing_type).") activation = (typeof(activation) <: Activation) ? activation : Activation(activation) - new{typeof(activation), typeof(parameters)}(dim, width, nhidden, n_integrators, parameters, activation) + new{forcing_type, typeof(activation), typeof(parameters)}(dim, width, nhidden, n_integrators, parameters, activation) end end -function Chain(arch::ForcedGeneralizedHamiltonianArchitecture) +function Chain(arch::ForcedGeneralizedHamiltonianArchitecture{FT}) where {FT} layers = () kinetic_energy = SymbolicKineticEnergy(arch.dim, arch.width, arch.nhidden, arch.activation; parameters=arch.parameters) potential_energy = SymbolicPotentialEnergy(arch.dim, arch.width, arch.nhidden, arch.activation; parameters=arch.parameters) @@ -25,7 +26,7 @@ function Chain(arch::ForcedGeneralizedHamiltonianArchitecture) layers = (layers..., SymplecticEulerA(kinetic_energy; return_parameters = true)) layers = (layers..., SymplecticEulerB(potential_energy; return_parameters = true)) _return_parameters = !(i == arch.n_integrators) - layers = (layers..., ForcingLayerP(arch.dim, arch.width, arch.nhidden, arch.activation; parameters=arch.parameters, return_parameters=_return_parameters)) + layers = (layers..., ForcingLayer(arch.dim, arch.width, arch.nhidden, arch.activation; parameters=arch.parameters, return_parameters=_return_parameters, type=FT)) end Chain(layers...) end \ No newline at end of file diff --git a/src/architectures/forced_sympnet.jl b/src/architectures/forced_sympnet.jl index a80bcd9e..a2961ebb 100644 --- a/src/architectures/forced_sympnet.jl +++ b/src/architectures/forced_sympnet.jl @@ -22,7 +22,7 @@ Keyword arguments are: - `activation""" * "$(g_activation_default)`" * raw""": The activation function that is applied. - `init_upper::Bool""" * "$(g_init_upper_default)`" * raw""": Initialize the gradient layer so that it first modifies the $q$-component. """ -struct ForcedSympNet{AT} <: NeuralNetworkIntegrator +struct ForcedSympNet{FT, AT} <: NeuralNetworkIntegrator dim::Int upscaling_dimension::Int n_layers::Int @@ -33,20 +33,22 @@ struct ForcedSympNet{AT} <: NeuralNetworkIntegrator upscaling_dimension = 2 * dim, n_layers = g_n_layers_default, activation = g_activation_default, - init_upper = g_init_upper_default) - new{typeof(activation)}(dim, upscaling_dimension, n_layers, activation, init_upper) + init_upper = g_init_upper_default, + forcing_type::Symbol = :P) + new{forcing_type, typeof(activation)}(dim, upscaling_dimension, n_layers, activation, init_upper) end function ForcedSympNet(dl::DataLoader; upscaling_dimension = 2 * dl.input_dim, n_layers = g_n_layers_default, activation = g_activation_default, - init_upper = g_init_upper_default) - new{typeof(activation)}(dl.input_dim, upscaling_dimension, n_layers, activation, init_upper) + init_upper = g_init_upper_default, + forcing_type::Symbol = :P) + new{forcing_type, typeof(activation)}(dl.input_dim, upscaling_dimension, n_layers, activation, init_upper) end end -function Chain(arch::ForcedSympNet) +function Chain(arch::ForcedSympNet{FT}) where {FT} layers = () is_upper_criterion = arch.init_upper ? isodd : iseven for i in 1:arch.n_layers @@ -57,7 +59,7 @@ function Chain(arch::ForcedSympNet) GradientLayerP(arch.dim, arch.upscaling_dimension, arch.act) end ) - layers = (layers..., ForcingLayerP(arch.dim, arch.upscaling_dimension, arch.n_layers, arch.act; return_parameters=false)) + layers = (layers..., ForcingLayer(arch.dim, arch.upscaling_dimension, arch.n_layers, arch.act; return_parameters=false, type=FT)) end Chain(layers...) end \ No newline at end of file From 17228ed81e3a5cb428de90b10ad7355681ba4c51 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Sat, 4 Oct 2025 12:28:14 +0200 Subject: [PATCH 40/50] Fixed typo. --- src/architectures/generalized_hamiltonian_neural_network.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/architectures/generalized_hamiltonian_neural_network.jl b/src/architectures/generalized_hamiltonian_neural_network.jl index b3f9d2cd..d320d3a4 100644 --- a/src/architectures/generalized_hamiltonian_neural_network.jl +++ b/src/architectures/generalized_hamiltonian_neural_network.jl @@ -310,9 +310,9 @@ end function (c::Chain)(qp::AbstractArray{T, 3}, system_params::AbstractVector, ps::Union{NamedTuple, NeuralNetworkParameters}) where {T} @assert size(qp, 3) == length(system_params) @assert size(qp, 2) == 1 - @assert iseven(size(qp_reshaped, 1)) - n = size(qp_reshaped, 1)÷2 - qp_split = assign_q_and_p(qp_reshaped, n) + @assert iseven(size(qp, 1)) + n = size(qp, 1)÷2 + qp_split = assign_q_and_p(qp, n) c_output = c(qp_split, system_params, ps)::QPT reshape(vcat(c_output.q, c_output.p), 2n, length(system_params)) end From 353353ce2094f84dc4273063573dc96b76f1d9cf Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 7 Oct 2025 12:05:02 +0200 Subject: [PATCH 41/50] Added Marsden West-reference. --- docs/src/GeometricMachineLearning.bib | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/src/GeometricMachineLearning.bib b/docs/src/GeometricMachineLearning.bib index 6b971f3e..dc9e18fa 100644 --- a/docs/src/GeometricMachineLearning.bib +++ b/docs/src/GeometricMachineLearning.bib @@ -928,6 +928,16 @@ @article{ge1988lie publisher={Elsevier} } +@article{marsden2001discrete, + title={Discrete mechanics and variational integrators}, + author={Marsden, Jerrold E and West, Matthew}, + journal={Acta numerica}, + volume={10}, + pages={357--514}, + year={2001}, + publisher={Cambridge University Press} +} + @article{otto2023learning, title={Learning nonlinear projections for reduced-order modeling of dynamical systems using constrained autoencoders}, author={Otto, Samuel E and Macchio, Gregory R and Rowley, Clarence W}, From 5c654707d9b2f18a6c25ef7b3a80b2072d5c0d01 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 7 Oct 2025 12:05:42 +0200 Subject: [PATCH 42/50] Removed comment that is outdated now. --- src/architectures/forced_sympnet.jl | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/architectures/forced_sympnet.jl b/src/architectures/forced_sympnet.jl index a2961ebb..fb95561b 100644 --- a/src/architectures/forced_sympnet.jl +++ b/src/architectures/forced_sympnet.jl @@ -3,9 +3,6 @@ `ForcedSympNet`s are based on [`SympNet`](@ref)s [jin2020sympnets](@cite) and include [`ForcingLayer`](@ref)s. They are based on [`GSympNet`](@ref)s. -!!! warn - For the moment only ``p`` forcing is implemented (i.e. we only use [`ForcingLayerP`](@ref)). - # Constructor ```julia From 8873c193787db3ac260675c893d6560fb4a17f78 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 7 Oct 2025 12:06:38 +0200 Subject: [PATCH 43/50] Fixed forcing for Q terms. --- src/layers/forcing_dissipation_layers.jl | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/layers/forcing_dissipation_layers.jl b/src/layers/forcing_dissipation_layers.jl index e28e0d2a..973683d1 100644 --- a/src/layers/forcing_dissipation_layers.jl +++ b/src/layers/forcing_dissipation_layers.jl @@ -4,6 +4,11 @@ Layers that can learn dissipative or forcing terms, but not conservative ones. Use the constructors [`ForcingLayerQ`](@ref) and [`ForcingLayerP`](@ref) for this. + +!!! warn + The forcing is dependent on either ``q`` or ``p``, but always applied to the ``p`` component. + +The forcing layers are inspired by the Lagrange-d'Alembert integrator from [marsden2001discrete; Example 3.2.2](@cite). """ struct ForcingLayer{M, N, PT<:Base.Callable, CT, type, ReturnParameters} <: AbstractExplicitLayer{M, N} dim::Int @@ -83,7 +88,7 @@ end function (integrator::ForcingLayerQ{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} input = concatenate_array_with_parameters(qp.q, problem_params) - (q = qp.q + integrator.model(input, params), p = qp.p) + (q = qp.q, p = qp.p + integrator.model(input, params)) end function (integrator::ForcingLayerP{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} @@ -93,7 +98,7 @@ end function (integrator::ForcingLayerQ{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} input = concatenate_array_with_parameters(qp.q, problem_params) - ((q = qp.q + integrator.model(input, params), p = qp.p), problem_params) + ((q = qp.q, p = qp.p + integrator.model(input, params)), problem_params) end function (integrator::ForcingLayerP{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} From 20805b4e1004e203ff05b32dec2274cc01fa3eb3 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 7 Oct 2025 15:06:16 +0200 Subject: [PATCH 44/50] Adjusted the SympNet/GHNN integrators to the Lagrange-d'Alembert integrator from Marsden-West. --- .../forced_generalized_hamiltonian_neural_network.jl | 4 ++-- src/architectures/forced_sympnet.jl | 2 +- .../generalized_hamiltonian_neural_network.jl | 6 ++++++ src/layers/forcing_dissipation_layers.jl | 10 +++++++++- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/architectures/forced_generalized_hamiltonian_neural_network.jl b/src/architectures/forced_generalized_hamiltonian_neural_network.jl index 3a1d19a6..e409ffa4 100644 --- a/src/architectures/forced_generalized_hamiltonian_neural_network.jl +++ b/src/architectures/forced_generalized_hamiltonian_neural_network.jl @@ -24,9 +24,9 @@ function Chain(arch::ForcedGeneralizedHamiltonianArchitecture{FT}) where {FT} potential_energy = SymbolicPotentialEnergy(arch.dim, arch.width, arch.nhidden, arch.activation; parameters=arch.parameters) for i ∈ 1:arch.n_integrators layers = (layers..., SymplecticEulerA(kinetic_energy; return_parameters = true)) - layers = (layers..., SymplecticEulerB(potential_energy; return_parameters = true)) + layers = (layers..., ForcingLayer(arch.dim, arch.width, arch.nhidden, arch.activation; parameters=arch.parameters, return_parameters=true, type=FT)) _return_parameters = !(i == arch.n_integrators) - layers = (layers..., ForcingLayer(arch.dim, arch.width, arch.nhidden, arch.activation; parameters=arch.parameters, return_parameters=_return_parameters, type=FT)) + layers = (layers..., SymplecticEulerB(potential_energy; return_parameters = _return_parameters)) end Chain(layers...) end \ No newline at end of file diff --git a/src/architectures/forced_sympnet.jl b/src/architectures/forced_sympnet.jl index fb95561b..83128b8f 100644 --- a/src/architectures/forced_sympnet.jl +++ b/src/architectures/forced_sympnet.jl @@ -53,10 +53,10 @@ function Chain(arch::ForcedSympNet{FT}) where {FT} if is_upper_criterion(i) GradientLayerQ(arch.dim, arch.upscaling_dimension, arch.act) else + ForcingLayer(arch.dim, arch.upscaling_dimension, arch.n_layers, arch.act; return_parameters=false, type=FT), GradientLayerP(arch.dim, arch.upscaling_dimension, arch.act) end ) - layers = (layers..., ForcingLayer(arch.dim, arch.upscaling_dimension, arch.n_layers, arch.act; return_parameters=false, type=FT)) end Chain(layers...) end \ No newline at end of file diff --git a/src/architectures/generalized_hamiltonian_neural_network.jl b/src/architectures/generalized_hamiltonian_neural_network.jl index d320d3a4..9e020d02 100644 --- a/src/architectures/generalized_hamiltonian_neural_network.jl +++ b/src/architectures/generalized_hamiltonian_neural_network.jl @@ -136,12 +136,18 @@ end const SymplecticEulerA{M, N, FT, AT, ReturnParameters} = SymplecticEuler{M, N, FT, AT, :A, ReturnParameters} const SymplecticEulerB{M, N, FT, AT, ReturnParameters} = SymplecticEuler{M, N, FT, AT, :B, ReturnParameters} +""" +Changes ``q`` (based on the kinetic energy). +""" function SymplecticEulerA(se::SymbolicKineticEnergy; return_parameters::Bool) gradient_function = build_gradient(se) c = Chain(se) SymplecticEuler{se.dim, se.dim, typeof(gradient_function), typeof(c), :A, return_parameters}(gradient_function, c) end +""" +Changes ``p`` (based on the potential energy). +""" function SymplecticEulerB(se::SymbolicPotentialEnergy; return_parameters::Bool) gradient_function = build_gradient(se) c = Chain(se) diff --git a/src/layers/forcing_dissipation_layers.jl b/src/layers/forcing_dissipation_layers.jl index 973683d1..6ea03f85 100644 --- a/src/layers/forcing_dissipation_layers.jl +++ b/src/layers/forcing_dissipation_layers.jl @@ -8,7 +8,15 @@ Use the constructors [`ForcingLayerQ`](@ref) and [`ForcingLayerP`](@ref) for thi !!! warn The forcing is dependent on either ``q`` or ``p``, but always applied to the ``p`` component. -The forcing layers are inspired by the Lagrange-d'Alembert integrator from [marsden2001discrete; Example 3.2.2](@cite). +The forcing layers are inspired by the Lagrange-d'Alembert integrator from [marsden2001discrete; Example 3.2.2](@cite): + +```math +\begin{aligned} + q^{(t+1)} = & q^{(t)} + & hM^{-1}p^{(t)}, \\ + p^{(t+1)} = & p^{(t)} + & -h\nabla{}U(q^{(t)}) + hf_H(q^{(t+1)}), +\end{aligned} +``` +for a separable Hamiltonian ``H(q, p) = T(p) + U(q) = p^TM^{-1}p + U(q)`` and external forcing ``f_H.`` """ struct ForcingLayer{M, N, PT<:Base.Callable, CT, type, ReturnParameters} <: AbstractExplicitLayer{M, N} dim::Int From 01d64c6144bdc1a944c89afca36d04a77efd99b7 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 7 Oct 2025 15:18:16 +0200 Subject: [PATCH 45/50] Fixed equation. --- src/layers/forcing_dissipation_layers.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/layers/forcing_dissipation_layers.jl b/src/layers/forcing_dissipation_layers.jl index 6ea03f85..7755f915 100644 --- a/src/layers/forcing_dissipation_layers.jl +++ b/src/layers/forcing_dissipation_layers.jl @@ -13,7 +13,7 @@ The forcing layers are inspired by the Lagrange-d'Alembert integrator from [mars ```math \begin{aligned} q^{(t+1)} = & q^{(t)} + & hM^{-1}p^{(t)}, \\ - p^{(t+1)} = & p^{(t)} + & -h\nabla{}U(q^{(t)}) + hf_H(q^{(t+1)}), + p^{(t+1)} = & p^{(t)} + & -h\nabla{}U(q^{(t+1)}) + hf_H(q^{(t+1)}, p^{(t)}), \end{aligned} ``` for a separable Hamiltonian ``H(q, p) = T(p) + U(q) = p^TM^{-1}p + U(q)`` and external forcing ``f_H.`` From d5924eb753fe3df318150c49200a333db24c8fc2 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 20 Oct 2025 23:52:32 +0200 Subject: [PATCH 46/50] Fixed ForcedSympNets. Problem was due to not specifying NullParameters as default when network was agnostic to Q/P. --- src/architectures/forced_sympnet.jl | 8 ++++---- src/layers/forcing_dissipation_layers.jl | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/architectures/forced_sympnet.jl b/src/architectures/forced_sympnet.jl index 83128b8f..26a17cb0 100644 --- a/src/architectures/forced_sympnet.jl +++ b/src/architectures/forced_sympnet.jl @@ -49,14 +49,14 @@ function Chain(arch::ForcedSympNet{FT}) where {FT} layers = () is_upper_criterion = arch.init_upper ? isodd : iseven for i in 1:arch.n_layers - layers = (layers..., + layers = if is_upper_criterion(i) - GradientLayerQ(arch.dim, arch.upscaling_dimension, arch.act) + (layers..., GradientLayerQ(arch.dim, arch.upscaling_dimension, arch.act)) else + (layers..., ForcingLayer(arch.dim, arch.upscaling_dimension, arch.n_layers, arch.act; return_parameters=false, type=FT), - GradientLayerP(arch.dim, arch.upscaling_dimension, arch.act) + GradientLayerP(arch.dim, arch.upscaling_dimension, arch.act)) end - ) end Chain(layers...) end \ No newline at end of file diff --git a/src/layers/forcing_dissipation_layers.jl b/src/layers/forcing_dissipation_layers.jl index 7755f915..34afd564 100644 --- a/src/layers/forcing_dissipation_layers.jl +++ b/src/layers/forcing_dissipation_layers.jl @@ -59,7 +59,7 @@ function build_chain(dim::Integer, width::Integer, nhidden::Integer, parameter_l ) end -function ForcingLayer(dim::Integer, width::Integer, nhidden::Integer, activation; parameters::OptionalParameters, return_parameters::Bool, type::Symbol) +function ForcingLayer(dim::Integer, width::Integer, nhidden::Integer, activation; parameters::OptionalParameters=NullParameters(), return_parameters::Bool, type::Symbol) flattened_parameters = ParameterHandling.flatten(parameters) parameter_length = length(flattened_parameters[1]) c = build_chain(dim, width, nhidden, parameter_length, activation) From 81a991a27a72fc921ca16c754b0a82d973d140b0 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Tue, 21 Oct 2025 00:55:09 +0200 Subject: [PATCH 47/50] Added qp-forcing (i.e. forcing that depends on both q and p). --- scripts/Train_DampedOscillator_QP.jl | 114 ++++++++++++++++++ ..._generalized_hamiltonian_neural_network.jl | 2 +- src/architectures/forced_sympnet.jl | 2 +- src/layers/forcing_dissipation_layers.jl | 31 ++++- 4 files changed, 142 insertions(+), 7 deletions(-) create mode 100644 scripts/Train_DampedOscillator_QP.jl diff --git a/scripts/Train_DampedOscillator_QP.jl b/scripts/Train_DampedOscillator_QP.jl new file mode 100644 index 00000000..2ccf6b7a --- /dev/null +++ b/scripts/Train_DampedOscillator_QP.jl @@ -0,0 +1,114 @@ +using HDF5 +using GeometricMachineLearning +using GeometricMachineLearning: QPT, QPT2 +using CairoMakie +using JLD2 +using NNlib: relu + +# PARAMETERS +nu = 0.001 # friction force coefficient +ni_dim = 2 # number of initial conditions per dimension (so ni_dim^2 total) +T = 13 +nt = 100 # number of time steps +dt = T/nt # time step +n_epochs = 100000 +n_epochs = 3 +width = 4 # width of the neural network +nhidden = 3 # number of hidden layers in the neural network +batch_size = 5000 # the size of the batch + +path_out = "D:\\RESEARCH - UTWENTE\\GFHNNs\\Damped Oscillator\\network_TEST.jld2" +#path_out = "/home/tyranowskitm/GFHNNs/DampedOscillator/OUTPUTS/network.jld2" + + +# Generating the initial condition array +IC = vec( [(q=q0, p=p0) for q0 in range(-1, 1, ni_dim), p0 in range(-1, 1, ni_dim)] ) + + +# Generating the solution array +ni = ni_dim^2 +omega = sqrt(4-nu^2) / 2 + +q = zeros(Float64, ni, nt+1) +p = zeros(Float64, ni, nt+1) +t = collect(dt*range(0,nt,step=1)) + +for i in 1:nt+1 + + for j=1:ni + q[j,i] = (1/omega)*( IC[j].p + nu/2 *IC[j].q )*exp(-nu*t[i]/2)*sin(omega*t[i]) + IC[j].q*exp(-nu*t[i]/2)*cos(omega*t[i]) + p[j,i] = -(1/omega)*( IC[j].q + nu/2 *IC[j].p )*exp(-nu*t[i]/2)*sin(omega*t[i]) + IC[j].p*exp(-nu*t[i]/2)*cos(omega*t[i]) + end + +end + + + +@doc raw""" +Turn a `NamedTuple` of ``(q,p)`` data into two tensors of the correct format. + +This is the tricky part as the structure of the input array(s) needs to conform with the structure of the parameters. + +Here the data are rearranged in an array of size ``(n, 2, t_f - 1)`` where ``[t_0, t_1, \ldots, t_f]`` is the vector storing the time steps. + +If we deal with different initial conditions as well, we still put everything into the third (parameter) axis. + +# Example + +```jldoctest +using GeometricMachineLearning + +q = [1. 2. 3.; 4. 5. 6.] +p = [1.5 2.5 3.5; 4.5 5.5 6.5] +qp = (q = q, p = p) +turn_q_p_data_into_correct_format(qp) + +# output + +(q = [1.0 2.0; 4.0 5.0;;; 2.0 3.0; 5.0 6.0], p = [1.5 2.5; 4.5 5.5;;; 2.5 3.5; 5.5 6.5]) +``` +""" +function turn_q_p_data_into_correct_format(qp::QPT2{T, 2}) where {T} + number_of_time_steps = size(qp.q, 2) - 1 # not counting t₀ + number_of_initial_conditions = size(qp.q, 1) + q_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + p_array = zeros(T, 1, 2, number_of_time_steps * number_of_initial_conditions) + for initial_condition_index ∈ 0:(number_of_initial_conditions - 1) + for time_index ∈ 1:number_of_time_steps + q_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index] + q_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.q[initial_condition_index + 1, time_index + 1] + p_array[:, 1, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index] + p_array[:, 2, initial_condition_index * number_of_time_steps + time_index] .= qp.p[initial_condition_index + 1, time_index + 1] + end + end + (q = q_array, p = p_array) +end + + +# This sets up the data loader +dl = DataLoader(turn_q_p_data_into_correct_format((q = q, p = p))) + +# This sets up the neural network +arch = ForcedSympNet(2; upscaling_dimension = width, n_layers = nhidden, forcing_type = :QP) +#arch = ForcedSympNet(2; upscaling_dimension = width, n_layers = nhidden, activation=(x-> max(0,x)^2/2)) +nn = NeuralNetwork(arch) + +# This is where training starts +batch = Batch(batch_size) +o = Optimizer(AdamOptimizer(), nn) + +loss_array = o(nn, dl, batch, n_epochs) + + +# Saving the parameters of the network +println("Saving the parameters of the neural network...") +flush(stdout) + +params = GeometricMachineLearning.map_to_cpu(nn.params) + +save(path_out,"parameters", params, "training loss", loss_array, "ni_dim", ni_dim, "T", T, "nt", nt, "n_epochs", n_epochs, "width", width, "nhidden", nhidden, "batch_size", batch_size, "nu", nu) + +println(" ...Done!") +flush(stdout) + + diff --git a/src/architectures/forced_generalized_hamiltonian_neural_network.jl b/src/architectures/forced_generalized_hamiltonian_neural_network.jl index e409ffa4..6fa21075 100644 --- a/src/architectures/forced_generalized_hamiltonian_neural_network.jl +++ b/src/architectures/forced_generalized_hamiltonian_neural_network.jl @@ -12,7 +12,7 @@ struct ForcedGeneralizedHamiltonianArchitecture{FT, AT, PT <: OptionalParameters activation::AT function ForcedGeneralizedHamiltonianArchitecture(dim; width=dim, nhidden=HNN_nhidden_default, n_integrators::Integer=1, activation=HNN_activation_default, parameters=NullParameters(), forcing_type::Symbol=:P) - forcing_type == :P || forcing_type == :Q || error("Forcing has to be either :Q or :P. It is $(forcing_type).") + forcing_type == :P || forcing_type == :Q || forcing_type == :QP || error("Forcing has to be either :Q or :P. It is $(forcing_type).") activation = (typeof(activation) <: Activation) ? activation : Activation(activation) new{forcing_type, typeof(activation), typeof(parameters)}(dim, width, nhidden, n_integrators, parameters, activation) end diff --git a/src/architectures/forced_sympnet.jl b/src/architectures/forced_sympnet.jl index 26a17cb0..166b0e96 100644 --- a/src/architectures/forced_sympnet.jl +++ b/src/architectures/forced_sympnet.jl @@ -40,7 +40,7 @@ struct ForcedSympNet{FT, AT} <: NeuralNetworkIntegrator n_layers = g_n_layers_default, activation = g_activation_default, init_upper = g_init_upper_default, - forcing_type::Symbol = :P) + forcing_type::Symbol = :P) new{forcing_type, typeof(activation)}(dl.input_dim, upscaling_dimension, n_layers, activation, init_upper) end end diff --git a/src/layers/forcing_dissipation_layers.jl b/src/layers/forcing_dissipation_layers.jl index 34afd564..c816a1eb 100644 --- a/src/layers/forcing_dissipation_layers.jl +++ b/src/layers/forcing_dissipation_layers.jl @@ -36,24 +36,31 @@ end """ ForcingLayerQ -A layer that is derived from the more general [`ForcingLayer`](@ref) and only changes the ``q`` component. +A layer that is derived from the more general [`ForcingLayer`](@ref) and the resulting forcing only depends on the ``q`` component. """ const ForcingLayerQ{M, N, FT, AT, ReturnParameters} = ForcingLayer{M, N, FT, AT, :Q, ReturnParameters} """ ForcingLayerP -A layer that is derived from the more general [`ForcingLayer`](@ref) and only changes the ``p`` component. +A layer that is derived from the more general [`ForcingLayer`](@ref) and the resulting forcing only depends on the ``p`` component. """ const ForcingLayerP{M, N, FT, AT, ReturnParameters} = ForcingLayer{M, N, FT, AT, :P, ReturnParameters} -function build_chain(dim::Integer, width::Integer, nhidden::Integer, parameter_length::Integer, activation) +""" + ForcingLayerQP + +A layer that is derived from the more general [`ForcingLayer`](@ref) and the resulting forcing only depends on the ``q`` and the ``p`` component. +""" +const ForcingLayerQP{M, N, FT, AT, ReturnParameters} = ForcingLayer{M, N, FT, AT, :QP, ReturnParameters} + +function build_chain(dim::Integer, width::Integer, nhidden::Integer, parameter_length::Integer, activation, type::Symbol) inner_layers = Tuple( [Dense(width, width, activation) for _ in 1:nhidden] ) Chain( - Dense(dim÷2 + parameter_length, width, activation), + type == :QP ? Dense(dim + parameter_length, width, activation) : Dense(dim÷2 + parameter_length, width, activation), inner_layers..., Linear(width, dim÷2; use_bias = false) ) @@ -62,7 +69,7 @@ end function ForcingLayer(dim::Integer, width::Integer, nhidden::Integer, activation; parameters::OptionalParameters=NullParameters(), return_parameters::Bool, type::Symbol) flattened_parameters = ParameterHandling.flatten(parameters) parameter_length = length(flattened_parameters[1]) - c = build_chain(dim, width, nhidden, parameter_length, activation) + c = build_chain(dim, width, nhidden, parameter_length, activation, type) ForcingLayer{dim, dim, typeof(flattened_parameters[2]), typeof(c), type, return_parameters}(dim, width, nhidden, parameter_length, flattened_parameters[2], c) end @@ -94,6 +101,10 @@ function ForcingLayerP(dim::Integer, width::Integer=dim, nhidden::Integer=HNN_nh ForcingLayer(dim, width, nhidden, activation; parameters=parameters, return_parameters=return_parameters, type=:P) end +function ForcingLayerQP(dim::Integer, width::Integer=dim, nhidden::Integer=HNN_nhidden_default, activation=HNN_activation_default; parameters::OptionalParameters=NullParameters(), return_parameters::Bool=false) + ForcingLayer(dim, width, nhidden, activation; parameters=parameters, return_parameters=return_parameters, type=:QP) +end + function (integrator::ForcingLayerQ{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} input = concatenate_array_with_parameters(qp.q, problem_params) (q = qp.q, p = qp.p + integrator.model(input, params)) @@ -104,6 +115,11 @@ function (integrator::ForcingLayerP{M, N, FT, AT, false})(qp::QPT2, problem_para (q = qp.q, p = qp.p + integrator.model(input, params)) end +function (integrator::ForcingLayerQP{M, N, FT, AT, false})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(vcat(qp.q, qp.p), problem_params) + (q = qp.q, p = qp.p + integrator.model(input, params)) +end + function (integrator::ForcingLayerQ{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} input = concatenate_array_with_parameters(qp.q, problem_params) ((q = qp.q, p = qp.p + integrator.model(input, params)), problem_params) @@ -114,6 +130,11 @@ function (integrator::ForcingLayerP{M, N, FT, AT, true})(qp::QPT2, problem_param ((q = qp.q, p = qp.p + integrator.model(input, params)), problem_params) end +function (integrator::ForcingLayerQP{M, N, FT, AT, true})(qp::QPT2, problem_params::OptionalParameters, params::NeuralNetworkParameters) where {M, N, FT, AT} + input = concatenate_array_with_parameters(vcat(qp.q, qp.p), problem_params) + ((q = qp.q, p = qp.p + integrator.model(input, params)), problem_params) +end + function (integrator::ForcingLayer)(qp_params::Tuple{<:QPTOAT2, <:OptionalParameters}, params::NeuralNetworkParameters) integrator(qp_params..., params) end From c563faa6fcb571617b759f2f90a306f9602d32eb Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Mon, 27 Oct 2025 17:30:37 +0100 Subject: [PATCH 48/50] Fixed reference. --- docs/src/GeometricMachineLearning.bib | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/src/GeometricMachineLearning.bib b/docs/src/GeometricMachineLearning.bib index dc9e18fa..6eaefd72 100644 --- a/docs/src/GeometricMachineLearning.bib +++ b/docs/src/GeometricMachineLearning.bib @@ -751,9 +751,10 @@ @article{bon2024optimal @article{kingma2014adam, title={Adam: a method for stochastic optimization}, - author={Kingma, DP}, + author={Kingma, Diederik P. and Ba, Jimmy Lei}, journal={arXiv preprint arXiv:1412.6980}, - year={2014} + year={2014}, + note={Published as a conference paper at ICLR 2015} } @article{toda1967vibration, From b0dfdd2cecb4bb4a3b9e36a809e9e98d8b1b2c0d Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Thu, 6 Nov 2025 18:46:16 +0100 Subject: [PATCH 49/50] Added another constant N_FORCING_LAYERS that determines the number of sublayers in a forcing layers, which should be independent of the total number of layers. --- ...ForcedGeneralizedHamiltonianNeuralNetwork.jl | 7 ++++++- scripts/Train_DampedOscillator_QP.jl | 2 +- ...ed_generalized_hamiltonian_neural_network.jl | 9 ++++++--- src/architectures/forced_sympnet.jl | 17 ++++++++++------- 4 files changed, 23 insertions(+), 12 deletions(-) diff --git a/scripts/TimeDependentHarmonicOscillatorForcedGeneralizedHamiltonianNeuralNetwork.jl b/scripts/TimeDependentHarmonicOscillatorForcedGeneralizedHamiltonianNeuralNetwork.jl index df2c42f2..e9ca22ce 100644 --- a/scripts/TimeDependentHarmonicOscillatorForcedGeneralizedHamiltonianNeuralNetwork.jl +++ b/scripts/TimeDependentHarmonicOscillatorForcedGeneralizedHamiltonianNeuralNetwork.jl @@ -115,8 +115,10 @@ n_integrators::Int = 2 # sigmoid_linear_unit(x::T) where {T<:Number} = x / (T(1) + exp(-x)) arch1 = ForcedGeneralizedHamiltonianArchitecture(2; activation = tanh, width = width, nhidden = nhidden, n_integrators = n_integrators, parameters = turn_parameters_into_correct_format(t, IC)[1], forcing_type = :P) arch2 = ForcedGeneralizedHamiltonianArchitecture(2; activation = tanh, width = width, nhidden = nhidden, n_integrators = n_integrators, parameters = turn_parameters_into_correct_format(t, IC)[1], forcing_type = :Q) +arch3 = ForcedGeneralizedHamiltonianArchitecture(2; activation = tanh, width = 2width, nhidden = nhidden, n_integrators = n_integrators, parameters = turn_parameters_into_correct_format(t, IC)[1], forcing_type = :QP) nn1 = NeuralNetwork(arch1) nn2 = NeuralNetwork(arch2) +nn3 = NeuralNetwork(arch3) # This is where training starts batch_size = 128 @@ -124,13 +126,16 @@ n_epochs = 200 batch = Batch(batch_size) o1 = Optimizer(AdamOptimizer(), nn1) o2 = Optimizer(AdamOptimizer(), nn2) +o3 = Optimizer(AdamOptimizer(), nn3) loss = ParametricLoss() _pb = SymbolicPullback(nn1, loss, turn_parameters_into_correct_format(t, IC)[1]); _pb = SymbolicPullback(nn2, loss, turn_parameters_into_correct_format(t, IC)[1]); +_pb = SymbolicPullback(nn3, loss, turn_parameters_into_correct_format(t, IC)[1]); function train_network() o1(nn1, dl, batch, n_epochs, loss, _pb) o2(nn2, dl, batch, n_epochs, loss, _pb) + o3(nn3, dl, batch, n_epochs, loss, _pb) end loss_array = train_network() @@ -145,7 +150,7 @@ trajectory.q[:, 1] .= initial_conditions.q trajectory.p[:, 1] .= initial_conditions.p # note that we have to supply the parameters as a named tuple as well here: for t_step ∈ 0:(n_steps-2) - qp_temporary = nn1.model((q = [trajectory.q[1, t_step+1]], p = [trajectory.p[1, t_step+1]]), (t = t[t_step+1],), nn1.params) + qp_temporary = nn3.model((q = [trajectory.q[1, t_step+1]], p = [trajectory.p[1, t_step+1]]), (t = t[t_step+1],), nn3.params) trajectory.q[:, t_step+2] .= qp_temporary.q trajectory.p[:, t_step+2] .= qp_temporary.p end diff --git a/scripts/Train_DampedOscillator_QP.jl b/scripts/Train_DampedOscillator_QP.jl index 2ccf6b7a..1058ef94 100644 --- a/scripts/Train_DampedOscillator_QP.jl +++ b/scripts/Train_DampedOscillator_QP.jl @@ -89,7 +89,7 @@ end dl = DataLoader(turn_q_p_data_into_correct_format((q = q, p = p))) # This sets up the neural network -arch = ForcedSympNet(2; upscaling_dimension = width, n_layers = nhidden, forcing_type = :QP) +arch = ForcedSympNet(2; upscaling_dimension = width, n_layers = nhidden, forcing_type = :P) #arch = ForcedSympNet(2; upscaling_dimension = width, n_layers = nhidden, activation=(x-> max(0,x)^2/2)) nn = NeuralNetwork(arch) diff --git a/src/architectures/forced_generalized_hamiltonian_neural_network.jl b/src/architectures/forced_generalized_hamiltonian_neural_network.jl index 6fa21075..2bb97632 100644 --- a/src/architectures/forced_generalized_hamiltonian_neural_network.jl +++ b/src/architectures/forced_generalized_hamiltonian_neural_network.jl @@ -1,3 +1,5 @@ +const N_FORCING_LAYERS_DEFAULT = 2 + """ ForcedGeneralizedHamiltonianArchitecture <: HamiltonianArchitecture @@ -7,14 +9,15 @@ struct ForcedGeneralizedHamiltonianArchitecture{FT, AT, PT <: OptionalParameters dim::Int width::Int nhidden::Int + n_forcing_layers::Int n_integrators::Int parameters::PT activation::AT - function ForcedGeneralizedHamiltonianArchitecture(dim; width=dim, nhidden=HNN_nhidden_default, n_integrators::Integer=1, activation=HNN_activation_default, parameters=NullParameters(), forcing_type::Symbol=:P) + function ForcedGeneralizedHamiltonianArchitecture(dim; width=dim, nhidden=HNN_nhidden_default, n_forcing_layers=N_FORCING_LAYERS_DEFAULT, n_integrators::Integer=1, activation=HNN_activation_default, parameters=NullParameters(), forcing_type::Symbol=:P) forcing_type == :P || forcing_type == :Q || forcing_type == :QP || error("Forcing has to be either :Q or :P. It is $(forcing_type).") activation = (typeof(activation) <: Activation) ? activation : Activation(activation) - new{forcing_type, typeof(activation), typeof(parameters)}(dim, width, nhidden, n_integrators, parameters, activation) + new{forcing_type, typeof(activation), typeof(parameters)}(dim, width, nhidden, n_forcing_layers, n_integrators, parameters, activation) end end @@ -24,7 +27,7 @@ function Chain(arch::ForcedGeneralizedHamiltonianArchitecture{FT}) where {FT} potential_energy = SymbolicPotentialEnergy(arch.dim, arch.width, arch.nhidden, arch.activation; parameters=arch.parameters) for i ∈ 1:arch.n_integrators layers = (layers..., SymplecticEulerA(kinetic_energy; return_parameters = true)) - layers = (layers..., ForcingLayer(arch.dim, arch.width, arch.nhidden, arch.activation; parameters=arch.parameters, return_parameters=true, type=FT)) + layers = (layers..., ForcingLayer(arch.dim, arch.width, arch.n_forcing_layers, arch.activation; parameters=arch.parameters, return_parameters=true, type=FT)) _return_parameters = !(i == arch.n_integrators) layers = (layers..., SymplecticEulerB(potential_energy; return_parameters = _return_parameters)) end diff --git a/src/architectures/forced_sympnet.jl b/src/architectures/forced_sympnet.jl index 166b0e96..582acca4 100644 --- a/src/architectures/forced_sympnet.jl +++ b/src/architectures/forced_sympnet.jl @@ -23,25 +23,28 @@ struct ForcedSympNet{FT, AT} <: NeuralNetworkIntegrator dim::Int upscaling_dimension::Int n_layers::Int + n_forcing_layers::Int act::AT init_upper::Bool - function ForcedSympNet(dim::Integer; + function ForcedSympNet(dim::Integer; upscaling_dimension = 2 * dim, - n_layers = g_n_layers_default, + n_layers = g_n_layers_default, + n_forcing_layers = N_FORCING_LAYERS_DEFAULT, activation = g_activation_default, init_upper = g_init_upper_default, forcing_type::Symbol = :P) - new{forcing_type, typeof(activation)}(dim, upscaling_dimension, n_layers, activation, init_upper) + new{forcing_type, typeof(activation)}(dim, upscaling_dimension, n_layers, n_forcing_layers, activation, init_upper) end - function ForcedSympNet(dl::DataLoader; + function ForcedSympNet(dl::DataLoader; upscaling_dimension = 2 * dl.input_dim, - n_layers = g_n_layers_default, + n_layers = g_n_layers_default, + n_forcing_layers = N_FORCING_LAYERS_DEFAULT, activation = g_activation_default, init_upper = g_init_upper_default, forcing_type::Symbol = :P) - new{forcing_type, typeof(activation)}(dl.input_dim, upscaling_dimension, n_layers, activation, init_upper) + new{forcing_type, typeof(activation)}(dl.input_dim, upscaling_dimension, n_layers, n_forcing_layers, activation, init_upper) end end @@ -54,7 +57,7 @@ function Chain(arch::ForcedSympNet{FT}) where {FT} (layers..., GradientLayerQ(arch.dim, arch.upscaling_dimension, arch.act)) else (layers..., - ForcingLayer(arch.dim, arch.upscaling_dimension, arch.n_layers, arch.act; return_parameters=false, type=FT), + ForcingLayer(arch.dim, arch.upscaling_dimension, arch.n_forcing_layers, arch.act; return_parameters=false, type=FT), GradientLayerP(arch.dim, arch.upscaling_dimension, arch.act)) end end From 7b10ae2144b0a04522c19622228a0d2f73a9f6c2 Mon Sep 17 00:00:00 2001 From: benedict-96 Date: Thu, 11 Dec 2025 09:43:20 +0100 Subject: [PATCH 50/50] Added script to determine parameters in forcing layers for :QP and :P. --- scripts/forcing_layers_parameter_number.jl | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 scripts/forcing_layers_parameter_number.jl diff --git a/scripts/forcing_layers_parameter_number.jl b/scripts/forcing_layers_parameter_number.jl new file mode 100644 index 00000000..e27376d0 --- /dev/null +++ b/scripts/forcing_layers_parameter_number.jl @@ -0,0 +1,11 @@ +using GeometricMachineLearning +using GeometricMachineLearning: ForcingLayerP, ForcingLayerQP + +forcing_layer_p = ForcingLayerP(2) +forcing_layer_qp = ForcingLayerQP(2) + +nn_p = NeuralNetwork(forcing_layer_p) +nn_qp = NeuralNetwork(forcing_layer_qp) + +println(parameterlength(nn_p)) +println(parameterlength(nn_qp)) \ No newline at end of file