diff --git a/examples/input.nn.recommended b/examples/input.nn.recommended index e53934e0f..a0030bf52 100644 --- a/examples/input.nn.recommended +++ b/examples/input.nn.recommended @@ -35,6 +35,7 @@ parallel_mode 0 # Training parallelization used ( jacobian_mode 1 # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian). update_strategy 0 # Update strategy (0 = Combined, 1 = Per-element). selection_mode 2 # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold). +decoupling_type 0 # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF). task_batch_size_energy 1 # Number of energy update candidates prepared per task for each update (0 = Entire training set). task_batch_size_force 1 # Number of force update candidates prepared per task for each update (0 = Entire training set). memorize_symfunc_results # Keep symmetry function results in memory. diff --git a/examples/nnp-train/Cu2S_PBE/input.nn b/examples/nnp-train/Cu2S_PBE/input.nn index b4aa9333e..23eccf6db 100644 --- a/examples/nnp-train/Cu2S_PBE/input.nn +++ b/examples/nnp-train/Cu2S_PBE/input.nn @@ -44,6 +44,7 @@ parallel_mode 0 # Training parallelization used ( jacobian_mode 1 # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian). update_strategy 0 # Update strategy (0 = Combined, 1 = Per-element). selection_mode 2 # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold). +decoupling_type 0 # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF). task_batch_size_energy 1 # Number of energy update candidates prepared per task for each update (0 = Entire training set). task_batch_size_force 1 # Number of force update candidates prepared per task for each update (0 = Entire training set). memorize_symfunc_results # Keep symmetry function results in memory. diff --git a/examples/nnp-train/H2O_RPBE-D3/input.nn b/examples/nnp-train/H2O_RPBE-D3/input.nn index b2666eecd..05b3fcacb 100644 --- a/examples/nnp-train/H2O_RPBE-D3/input.nn +++ b/examples/nnp-train/H2O_RPBE-D3/input.nn @@ -44,6 +44,7 @@ parallel_mode 0 # Training parallelization used ( jacobian_mode 1 # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian). update_strategy 0 # Update strategy (0 = Combined, 1 = Per-element). selection_mode 2 # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold). +decoupling_type 0 # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF). task_batch_size_energy 1 # Number of energy update candidates prepared per task for each update (0 = Entire training set). task_batch_size_force 1 # Number of force update candidates prepared per task for each update (0 = Entire training set). memorize_symfunc_results # Keep symmetry function results in memory. diff --git a/examples/nnp-train/LJ/input.nn b/examples/nnp-train/LJ/input.nn index d42118f4b..2a1812341 100644 --- a/examples/nnp-train/LJ/input.nn +++ b/examples/nnp-train/LJ/input.nn @@ -34,6 +34,7 @@ parallel_mode 0 # Training parallelization used ( jacobian_mode 1 # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian). update_strategy 0 # Update strategy (0 = Combined, 1 = Per-element). selection_mode 2 # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold). +decoupling_type 0 # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF). task_batch_size_energy 1 # Number of energy update candidates prepared per task for each update (0 = Entire training set). task_batch_size_force 1 # Number of force update candidates prepared per task for each update (0 = Entire training set). memorize_symfunc_results # Keep symmetry function results in memory. diff --git a/src/application/makefile b/src/application/makefile index c8a2b6529..29b260a37 100644 --- a/src/application/makefile +++ b/src/application/makefile @@ -47,7 +47,8 @@ APP_CORE=nnp-convert \ nnp-predict \ nnp-prune \ nnp-select \ - nnp-symfunc + nnp-symfunc \ + nnx # Applications which require libnnp and libnnptrain: # (nnp-train itself is actually a special case). @@ -55,7 +56,8 @@ APP_TRAIN=nnp-comp2 \ nnp-dataset \ nnp-norm \ nnp-scaling \ - nnp-sfclust + nnp-sfclust \ + nnp-winit # All applications together. APP=$(APP_CORE) $(APP_TRAIN) nnp-train @@ -72,7 +74,7 @@ CLEAN_APP=$(patsubst %, clean-%, $(APP)) all: $(APP_CORE) $(APP_TRAIN) nnp-train # Applications which require only libnnp: -$(APP_CORE): INCLUDES=-I./ -I$(PROJECT_INCLUDE)/ +$(APP_CORE): INCLUDES=-I./ -I$(PROJECT_INCLUDE)/ -I$(PROJECT_EIGEN) ifeq ($(MODE), shared) $(APP_CORE): LDFLAGS=-L$(PROJECT_LIB) -lnnp else diff --git a/src/application/nnp-winit.cpp b/src/application/nnp-winit.cpp new file mode 100644 index 000000000..285bb7612 --- /dev/null +++ b/src/application/nnp-winit.cpp @@ -0,0 +1,308 @@ +// n2p2 - A neural network potential package +// Copyright (C) 2018 Andreas Singraber (University of Vienna) +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#include "Log.h" +#include "NeuralNetwork.h" +#include "utility.h" +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace nnp; + +int main(int argc, char* argv[]) +{ + string activationString = "l"; + int numNeuronsPerLayer = 10; + size_t numBins = 1000; + size_t numLayers = 7; + vector meanActivation(numLayers, 0.0); + vector sigmaActivation(numLayers, 0.0); + vector countActivation(numLayers, 0); + vector meanGradient(numLayers - 1, 0.0); + vector sigmaGradient(numLayers - 1, 0.0); + vector countGradient(numLayers - 1, 0); + + mt19937_64 rng; + rng.seed(0); + uniform_real_distribution<> distUniform(-1.0, 1.0); + normal_distribution<> distNormal(0.0, 0.3); + auto generatorUniform = [&distUniform, &rng]() {return distUniform(rng);}; + auto generatorNormal = [&distNormal, &rng]() {return distNormal(rng);}; + + if (argc != 2) + { + cout << "USAGE: " << argv[0] << " \n" + << " ... Activation function type.\n"; + return 1; + } + + activationString = argv[1]; + + ofstream logFile; + logFile.open("nnp-winit.log"); + Log log; + log.registerStreamPointer(&logFile); + log << "\n"; + log << "*** WEIGHT INITIALIZATION TESTING *******" + "**************************************\n"; + log << "\n"; + + NeuralNetworkTopology t; + t.numLayers = numLayers; + t.activationFunctionsPerLayer.resize(t.numLayers); + fill(t.activationFunctionsPerLayer.begin(), + t.activationFunctionsPerLayer.end() - 1, + activationFromString(activationString)); + t.activationFunctionsPerLayer.back() = activationFromString("l"); + t.numNeuronsPerLayer.resize(t.numLayers); + fill(t.numNeuronsPerLayer.begin(), + t.numNeuronsPerLayer.end() - 1, + numNeuronsPerLayer); + t.numNeuronsPerLayer.back() = 1; + + NeuralNetwork nn(t.numLayers, + t.numNeuronsPerLayer.data(), + t.activationFunctionsPerLayer.data()); + + log << nn.info(); + + vector histActivation; + vector histGradient; + for (size_t i = 0; i < numLayers; ++i) + { + histActivation.push_back(gsl_histogram_alloc(numBins)); + gsl_histogram_set_ranges_uniform(histActivation.back(), -1.0, 1.0); + if (i < numLayers - 1) + { + histGradient.push_back(gsl_histogram_alloc(numBins)); + gsl_histogram_set_ranges_uniform(histGradient.back(), -1.0, 1.0); + } + } + + for (size_t i = 0; i < 100000; ++i) + { + vector input(numNeuronsPerLayer); + generate(input.begin(), input.end(), generatorNormal); + nn.setInput(input.data()); + + vector connections(nn.getNumConnections()); + generate(connections.begin(), connections.end(), generatorNormal); + nn.setConnections(connections.data()); + nn.modifyConnections(NeuralNetwork::MS_GLOROTBENGIO_NORMAL); + //nn.modifyConnections(NeuralNetwork::MS_FANIN_NORMAL); + nn.modifyConnections(NeuralNetwork::MS_ZEROBIAS); + + nn.propagate(); + + auto activations = nn.getNeuronsPerLayer(); + + auto gradient = nn.getGradientsPerLayer(); + + for (size_t il = 0; il < numLayers; ++il) + { + auto const& l = activations.at(il); + for (auto const& a : l) + { + gsl_histogram_increment(histActivation.at(il), a); + // Welford's algorithm (use sigma as M2 storage). + countActivation.at(il)++; + double const delta = a - meanActivation.at(il); + meanActivation.at(il) += delta / countActivation.at(il); + double const delta2 = a - meanActivation.at(il); + sigmaActivation.at(il) += delta * delta2; + } + if (il < numLayers - 1) + { + auto const& l = gradient.at(il); + for (auto const& g : l) + { + gsl_histogram_increment(histGradient.at(il), g); + countGradient.at(il)++; + double const delta = g - meanGradient.at(il); + meanGradient.at(il) += delta / countGradient.at(il); + double const delta2 = g - meanGradient.at(il); + sigmaGradient.at(il) += delta * delta2; + } + } + } + } + + //ofstream f; + //f.open("weights.dat"); + //nn.writeConnections(f); + //f.close(); + + for (size_t i = 0; i < numLayers; ++i) + { + sigmaActivation.at(i) = sqrt(sigmaActivation.at(i) + / (countActivation.at(i) - 1)); + if (i < numLayers - 1) + { + sigmaGradient.at(i) = sqrt(sigmaGradient.at(i) + / (countGradient.at(i) - 1)); + } + } + + log << "-----------------------------------------" + "--------------------------------------\n"; + log << "Layer |"; + for (size_t i = 0; i < numLayers; ++i) + { + log << strpr(" %12zu", i + 1); + } + log << "\n"; + log << "-----------------------------------------" + "--------------------------------------\n"; + log << "Activation |"; + log << strpr(" %12s", "G"); + for (size_t i = 1; i < numLayers; ++i) + { + log << strpr(" %12s", stringFromActivation( + t.activationFunctionsPerLayer[i]).c_str()); + } + log << "\n"; + log << "Count |"; + for (size_t i = 0; i < numLayers; ++i) + { + log << strpr(" %12.3E", static_cast(countActivation.at(i))); + } + log << "\n"; + log << "Mean |"; + for (size_t i = 0; i < numLayers; ++i) + { + log << strpr(" %12.5f", meanActivation.at(i)); + } + log << "\n"; + log << "Sigma |"; + for (size_t i = 0; i < numLayers; ++i) + { + log << strpr(" %12.5f", sigmaActivation.at(i)); + } + log << "\n"; + log << "-----------------------------------------" + "--------------------------------------\n"; + log << "Gradient\n"; + log << "Count |"; + log << strpr(" %12s", ""); + for (size_t i = 0; i < numLayers - 1; ++i) + { + log << strpr(" %12.3E", static_cast(countGradient.at(i))); + } + log << "\n"; + log << "Mean |"; + log << strpr(" %12s", ""); + for (size_t i = 0; i < numLayers - 1; ++i) + { + log << strpr(" %12.5f", meanGradient.at(i)); + } + log << "\n"; + log << "Sigma |"; + log << strpr(" %12s", ""); + for (size_t i = 0; i < numLayers - 1; ++i) + { + log << strpr(" %12.5f", sigmaGradient.at(i)); + } + log << "\n"; + log << "-----------------------------------------" + "--------------------------------------\n"; + + log << strpr("Writing activation histograms for %zu layers.\n", numLayers); + for (size_t i = 0; i < numLayers; ++i) + { + + string fileName = strpr("activation.%03zu.out", i + 1); + FILE* fp = 0; + fp = fopen(fileName.c_str(), "w"); + + // File header. + vector title; + vector colName; + vector colInfo; + vector colSize; + title.push_back(strpr("Activations histogram for layer %zu with " + "activation function %s.", + i + 1, + stringFromActivation( + t.activationFunctionsPerLayer[i]).c_str())); + colSize.push_back(16); + colName.push_back("activation_l"); + colInfo.push_back("Activation value, left bin limit."); + colSize.push_back(16); + colName.push_back("activation_r"); + colInfo.push_back("Activation value, right bin limit."); + colSize.push_back(16); + colName.push_back("hist"); + colInfo.push_back("Histogram count."); + appendLinesToFile(fp, + createFileHeader(title, + colSize, + colName, + colInfo)); + gsl_histogram_fprintf(fp, histActivation.at(i), "%16.8E", "%16.8E"); + fflush(fp); + fclose(fp); + fp = nullptr; + } + + log << strpr("Writing gradient histograms for %zu layers.\n", + numLayers - 1); + for (size_t i = 0; i < numLayers - 1; ++i) + { + + string fileName = strpr("gradient.%03zu.out", i + 2); + FILE* fp = 0; + fp = fopen(fileName.c_str(), "w"); + + // File header. + vector title; + vector colName; + vector colInfo; + vector colSize; + title.push_back(strpr("Gradient histogram for layer %zu.", i + 2)); + colSize.push_back(16); + colName.push_back("gradient_l"); + colInfo.push_back("Gradient value, left bin limit."); + colSize.push_back(16); + colName.push_back("gradient_r"); + colInfo.push_back("Gradient value, right bin limit."); + colSize.push_back(16); + colName.push_back("hist"); + colInfo.push_back("Histogram count."); + appendLinesToFile(fp, + createFileHeader(title, + colSize, + colName, + colInfo)); + gsl_histogram_fprintf(fp, histGradient.at(i), "%16.8E", "%16.8E"); + fflush(fp); + fclose(fp); + fp = nullptr; + } + + log << "Finished.\n"; + log << "*****************************************" + "**************************************\n"; + logFile.close(); + + return 0; +} diff --git a/src/application/nnx.cpp b/src/application/nnx.cpp new file mode 100644 index 000000000..95c765b96 --- /dev/null +++ b/src/application/nnx.cpp @@ -0,0 +1,40 @@ +// n2p2 - A neural network potential package +// Copyright (C) 2018 Andreas Singraber (University of Vienna) +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#include "NeuralNetworx.h" +#include +#include +#include +#include + +using namespace std; +using namespace nnp; + +int main(int argc, char* argv[]) +{ + vector numNeuronsPerLayer = {10, 15, 15, 3}; + //vector activationPerLayer = {"G", "t", "s", "l"}; + //NeuralNetworx nn(numNeuronsPerLayer, activationPerLayer); + vector activationPerLayer = {NeuralNetworx::Activation::IDENTITY, + NeuralNetworx::Activation::TANH, + NeuralNetworx::Activation::LOGISTIC, + NeuralNetworx::Activation::IDENTITY}; + NeuralNetworx nn(numNeuronsPerLayer, activationPerLayer); + + for (auto l : nn.info()) cout << l; + + return 0; +} diff --git a/src/libnnp/Mode.cpp b/src/libnnp/Mode.cpp index 475ec8ff3..b2d0cd548 100644 --- a/src/libnnp/Mode.cpp +++ b/src/libnnp/Mode.cpp @@ -758,51 +758,7 @@ void Mode::setupNeuralNetwork() { t.numNeuronsPerLayer[i] = atoi(globalNumNeuronsPerHiddenLayer.at(i-1).c_str()); - if (globalActivationFunctions.at(i-1) == "l") - { - a = NeuralNetwork::AF_IDENTITY; - } - else if (globalActivationFunctions.at(i-1) == "t") - { - a = NeuralNetwork::AF_TANH; - } - else if (globalActivationFunctions.at(i-1) == "s") - { - a = NeuralNetwork::AF_LOGISTIC; - } - else if (globalActivationFunctions.at(i-1) == "p") - { - a = NeuralNetwork::AF_SOFTPLUS; - } - else if (globalActivationFunctions.at(i-1) == "r") - { - a = NeuralNetwork::AF_RELU; - } - else if (globalActivationFunctions.at(i-1) == "g") - { - a = NeuralNetwork::AF_GAUSSIAN; - } - else if (globalActivationFunctions.at(i-1) == "c") - { - a = NeuralNetwork::AF_COS; - } - else if (globalActivationFunctions.at(i-1) == "S") - { - a = NeuralNetwork::AF_REVLOGISTIC; - } - else if (globalActivationFunctions.at(i-1) == "e") - { - a = NeuralNetwork::AF_EXP; - } - else if (globalActivationFunctions.at(i-1) == "h") - { - a = NeuralNetwork::AF_HARMONIC; - } - else - { - throw runtime_error("ERROR: Unknown activation " - "function.\n"); - } + a = activationFromString(globalActivationFunctions.at(i-1)); } } } @@ -884,7 +840,9 @@ void Mode::setupNeuralNetworkWeights(string const& fileNameFormat) weights.push_back(atof(splitLine.at(0).c_str())); } } - it->neuralNetwork->setConnections(&(weights.front())); + // Attention: need alternative (old) ordering scheme here for + // backward compatibility! + it->neuralNetwork->setConnectionsAO(&(weights.front())); file.close(); } diff --git a/src/libnnp/NeuralNetwork.cpp b/src/libnnp/NeuralNetwork.cpp index e7b7808c6..6271bcf94 100644 --- a/src/libnnp/NeuralNetwork.cpp +++ b/src/libnnp/NeuralNetwork.cpp @@ -80,12 +80,26 @@ NeuralNetwork(int numLayers, weightOffset[i] = weightOffset[i-1] + (layers[i-1].numNeurons + 1) * layers[i].numNeurons; } +#ifndef ALTERNATIVE_WEIGHT_ORDERING + biasOffset = new int*[numLayers-1]; + for (int i = 0; i < numLayers-1; i++) + { + biasOffset[i] = new int[layers[i+1].numNeurons]; + for (int j = 0; j < layers[i+1].numNeurons; j++) + { + biasOffset[i][j] = weightOffset[i] + + (layers[i+1].numNeuronsPrevLayer + 1) + * (j + 1) - 1; + } + } +#else biasOffset = new int[numLayers-1]; for (int i = 0; i < numLayers-1; i++) { biasOffset[i] = weightOffset[i] + layers[i+1].numNeurons * layers[i].numNeurons; } +#endif biasOnlyOffset = new int[numLayers-1]; biasOnlyOffset[0] = 0; for (int i = 1; i < numLayers-1; i++) @@ -106,7 +120,15 @@ NeuralNetwork::~NeuralNetwork() } delete[] layers; delete[] weightOffset; +#ifndef ALTERNATIVE_WEIGHT_ORDERING + for (int i = 0; i < numLayers - 1; i++) + { + delete[] biasOffset[i]; + } + delete[] biasOffset; +#else delete[] biasOffset; +#endif delete[] biasOnlyOffset; } @@ -148,6 +170,27 @@ void NeuralNetwork::setConnections(double const* const& connections) { int count = 0; + for (int i = 1; i < numLayers; i++) + { + for (int j = 0; j < layers[i].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++) + { + layers[i].neurons[j].weights[k] = connections[count]; + count++; + } + layers[i].neurons[j].bias = connections[count]; + count++; + } + } + + return; +} + +void NeuralNetwork::setConnectionsAO(double const* const& connections) +{ + int count = 0; + for (int i = 1; i < numLayers; i++) { for (int j = 0; j < layers[i].numNeuronsPrevLayer; j++) @@ -172,6 +215,27 @@ void NeuralNetwork::getConnections(double* connections) const { int count = 0; + for (int i = 1; i < numLayers; i++) + { + for (int j = 0; j < layers[i].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++) + { + connections[count] = layers[i].neurons[j].weights[k] ; + count++; + } + connections[count] = layers[i].neurons[j].bias; + count++; + } + } + + return; +} + +void NeuralNetwork::getConnectionsAO(double* connections) const +{ + int count = 0; + for (int i = 1; i < numLayers; i++) { for (int j = 0; j < layers[i].numNeuronsPrevLayer; j++) @@ -231,36 +295,119 @@ void NeuralNetwork::modifyConnections(ModificationScheme modificationScheme) } } } - else if (modificationScheme == MS_FANIN) + else if (modificationScheme == MS_FANIN_UNIFORM) { for (int i = 1; i < numLayers; i++) { - if(layers[i].activationFunction == AF_TANH) + for (int j = 0; j < layers[i].numNeurons; j++) { - for (int j = 0; j < layers[i].numNeurons; j++) + for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++) { - for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++) + if(layers[i].activationFunction == AF_TANH) { - layers[i].neurons[j].weights[k] /= - sqrt(layers[i].numNeuronsPrevLayer); + layers[i].neurons[j].weights[k] *= + sqrt(3.0 / layers[i].numNeuronsPrevLayer); + } + else if(layers[i].activationFunction == AF_LOGISTIC) + { + layers[i].neurons[j].weights[k] *= + 4.0 * sqrt(3.0 / layers[i].numNeuronsPrevLayer); + } + else if(layers[i].activationFunction == AF_RELU || + layers[i].activationFunction == AF_SOFTPLUS) + { + layers[i].neurons[j].weights[k] *= + sqrt(6.0 / layers[i].numNeuronsPrevLayer); } } } } } - else if (modificationScheme == MS_GLOROTBENGIO) + else if (modificationScheme == MS_FANIN_NORMAL) { for (int i = 1; i < numLayers; i++) { - if(layers[i].activationFunction == AF_TANH) + for (int j = 0; j < layers[i].numNeurons; j++) { - for (int j = 0; j < layers[i].numNeurons; j++) + for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++) { - for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++) + if(layers[i].activationFunction == AF_TANH) + { + layers[i].neurons[j].weights[k] *= + sqrt(1.0 / layers[i].numNeuronsPrevLayer); + } + else if(layers[i].activationFunction == AF_LOGISTIC) + { + layers[i].neurons[j].weights[k] *= + 4.0 * sqrt(1.0 / layers[i].numNeuronsPrevLayer); + } + else if(layers[i].activationFunction == AF_RELU || + layers[i].activationFunction == AF_SOFTPLUS) { - layers[i].neurons[j].weights[k] *= sqrt(6.0 / ( - layers[i].numNeuronsPrevLayer - + layers[i].numNeurons)); + layers[i].neurons[j].weights[k] *= + sqrt(2.0 / layers[i].numNeuronsPrevLayer); + } + } + } + } + } + else if (modificationScheme == MS_GLOROTBENGIO_UNIFORM) + { + for (int i = 1; i < numLayers; i++) + { + for (int j = 0; j < layers[i].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++) + { + if(layers[i].activationFunction == AF_TANH) + { + layers[i].neurons[j].weights[k] *= + sqrt(6.0 / (layers[i].numNeuronsPrevLayer + + layers[i].numNeurons)); + } + else if(layers[i].activationFunction == AF_LOGISTIC) + { + layers[i].neurons[j].weights[k] *= + 4.0 * sqrt(6.0 / (layers[i].numNeuronsPrevLayer + + layers[i].numNeurons)); + } + else if(layers[i].activationFunction == AF_RELU || + layers[i].activationFunction == AF_SOFTPLUS) + { + layers[i].neurons[j].weights[k] *= + sqrt(12.0 / (layers[i].numNeuronsPrevLayer + + layers[i].numNeurons)); + } + } + } + } + } + else if (modificationScheme == MS_GLOROTBENGIO_NORMAL) + { + for (int i = 1; i < numLayers; i++) + { + for (int j = 0; j < layers[i].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++) + { + if(layers[i].activationFunction == AF_TANH) + { + layers[i].neurons[j].weights[k] *= + sqrt(2.0 / (layers[i].numNeuronsPrevLayer + + layers[i].numNeurons)); + } + else if(layers[i].activationFunction == AF_LOGISTIC) + { + layers[i].neurons[j].weights[k] *= + 4.0 * sqrt(2.0 / (layers[i].numNeuronsPrevLayer + + layers[i].numNeurons)); + } + else if(layers[i].activationFunction == AF_RELU || + layers[i].activationFunction == AF_SOFTPLUS) + { + layers[i].neurons[j].weights[k] *= + sqrt(4.0 / (layers[i].numNeuronsPrevLayer + + layers[i].numNeurons)); } } } @@ -368,6 +515,42 @@ void NeuralNetwork::getOutput(double* output) const return; } +vector> NeuralNetwork::getNeuronsPerLayer() const +{ + vector> output; + + for (int i = 0; i < numLayers; ++i) + { + output.push_back(vector()); + for (int j = 0; j < layers[i].numNeurons; ++j) + { + output.back().push_back(layers[i].neurons[j].value); + } + } + + return output; +} + +vector> NeuralNetwork::getGradientsPerLayer() const +{ + vector> output; + + double* dEdc = new double[getNumConnections()]; + calculateDEdc(dEdc); + + for (int i = 0; i < numLayers - 2; ++i) + { + output.push_back(vector(dEdc + weightOffset[i], + dEdc + weightOffset[i + 1])); + } + output.push_back(vector(dEdc + weightOffset[numLayers - 2], + dEdc + getNumConnections())); + + delete[] dEdc; + + return output; +} + void NeuralNetwork::propagate() { for (int i = 1; i < numLayers; i++) @@ -428,6 +611,52 @@ void NeuralNetwork::calculateDEdG(double *dEdG) const void NeuralNetwork::calculateDEdc(double* dEdc) const { +#ifndef ALTERNATIVE_WEIGHT_ORDERING + int count = 0; + + for (int i = 0; i < numConnections; i++) + { + dEdc[i] = 0.0; + } + + for (int i = 0; i < outputLayer->numNeurons; i++) + { + dEdc[biasOffset[numLayers-2][i]] = outputLayer->neurons[i].dfdx; + if (normalizeNeurons) + { + dEdc[biasOffset[numLayers-2][i]] /= + outputLayer->numNeuronsPrevLayer; + } + } + + for (int i = numLayers - 2; i >= 0; i--) + { + count = 0; + for (int j = 0; j < layers[i+1].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeurons; k++) + { + dEdc[weightOffset[i]+count] = dEdc[biasOffset[i][j]] + * layers[i].neurons[k].value; + count++; + if (i >= 1) + { + dEdc[biasOffset[i-1][k]] += dEdc[biasOffset[i][j]] + * layers[i+1].neurons[j].weights[k] + * layers[i].neurons[k].dfdx; + } + } + count++; + } + if (normalizeNeurons && i >= 1) + { + for (int k = 0; k < layers[i].numNeurons; k++) + { + dEdc[biasOffset[i-1][k]] /= layers[i].numNeuronsPrevLayer; + } + } + } +#else int count = 0; for (int i = 0; i < numConnections; i++) @@ -468,6 +697,7 @@ void NeuralNetwork::calculateDEdc(double* dEdc) const } } } +#endif return; } @@ -512,6 +742,67 @@ void NeuralNetwork::calculateDFdc(double* dFdc, } void NeuralNetwork::writeConnections(std::ofstream& file) const +{ + // File header. + vector title; + vector colName; + vector colInfo; + vector colSize; + title.push_back("Neural network connection values (weights and biases)."); + colSize.push_back(24); + colName.push_back("connection"); + colInfo.push_back("Neural network connection value."); + colSize.push_back(1); + colName.push_back("t"); + colInfo.push_back("Connection type (a = weight, b = bias)."); + colSize.push_back(9); + colName.push_back("index"); + colInfo.push_back("Index enumerating weights."); + colSize.push_back(5); + colName.push_back("l_s"); + colInfo.push_back("Starting point layer (end point layer for biases)."); + colSize.push_back(5); + colName.push_back("n_s"); + colInfo.push_back("Starting point neuron in starting layer (end point " + "neuron for biases)."); + colSize.push_back(5); + colName.push_back("l_e"); + colInfo.push_back("End point layer."); + colSize.push_back(5); + colName.push_back("n_e"); + colInfo.push_back("End point neuron in end layer."); + appendLinesToFile(file, + createFileHeader(title, colSize, colName, colInfo)); + + int count = 0; + for (int i = 1; i < numLayers; i++) + { + for (int j = 0; j < layers[i].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++) + { + count++; + file << strpr("%24.16E a %9d %5d %5d %5d %5d\n", + layers[i].neurons[j].weights[k], + count, + i - 1, + k + 1, + i, + j + 1); + } + count++; + file << strpr("%24.16E b %9d %5d %5d\n", + layers[i].neurons[j].bias, + count, + i, + j + 1); + } + } + + return; +} + +void NeuralNetwork::writeConnectionsAO(std::ofstream& file) const { // File header. vector title; @@ -645,6 +936,67 @@ void NeuralNetwork::calculateD2EdGdc(int index, double const* const& dEdb, double* d2EdGdc) const { +#ifndef ALTERNATIVE_WEIGHT_ORDERING + int count = 0; + + for (int i = 0; i < outputLayer->numNeurons; i++) + { + d2EdGdc[biasOffset[numLayers-2][i]] = outputLayer->neurons[i].d2fdx2 + * outputLayer->neurons[i].dxdG; + if (normalizeNeurons) + { + d2EdGdc[biasOffset[numLayers-2][i]] /= + outputLayer->numNeuronsPrevLayer; + } + } + + for (int i = numLayers-2; i >= 0; i--) + { + count = 0; + for (int j = 0; j < layers[i+1].numNeurons; j++) + { + for (int k = 0; k < layers[i].numNeurons; k++) + { + if (i == 0) + { + d2EdGdc[weightOffset[i]+count] = d2EdGdc[biasOffset[i][j]] + * layers[i].neurons[k].value; + if (k == index) + { + d2EdGdc[weightOffset[i]+count] += + dEdb[biasOnlyOffset[i]+j]; + } + } + else + { + d2EdGdc[weightOffset[i]+count] = d2EdGdc[biasOffset[i][j]] + * layers[i].neurons[k].value + + dEdb[biasOnlyOffset[i]+j] * layers[i].neurons[k].dfdx + * layers[i].neurons[k].dxdG; + } + count++; + if (i >= 1) + { + d2EdGdc[biasOffset[i-1][k]] += + layers[i+1].neurons[j].weights[k] + * (d2EdGdc[biasOffset[i][j]] + * layers[i].neurons[k].dfdx + + dEdb[biasOnlyOffset[i]+j] + * layers[i].neurons[k].d2fdx2 + * layers[i].neurons[k].dxdG); + } + } + count++; + } + for (int k = 0; k < layers[i].numNeurons; k++) + { + if (normalizeNeurons && i >= 1) + { + d2EdGdc[biasOffset[i-1][k]] /= layers[i].numNeuronsPrevLayer; + } + } + } +#else int count = 0; for (int i = 0; i < outputLayer->numNeurons; i++) @@ -699,6 +1051,7 @@ void NeuralNetwork::calculateD2EdGdc(int index, } } } +#endif return; } @@ -921,6 +1274,45 @@ void NeuralNetwork::getNeuronStatistics(long* count, return; } +vector> NeuralNetwork::getLayerLimits() const +{ + vector> limits; + + for (int i = 0; i < numLayers - 2; ++i) + { + limits.push_back(make_pair(weightOffset[i], + weightOffset[i + 1] - 1)); + } + limits.push_back(make_pair(weightOffset[numLayers - 2], + numConnections - 1)); + + return limits; +} + +vector> NeuralNetwork::getNeuronLimits() const +{ + vector> limits; +#ifndef ALTERNATIVE_WEIGHT_ORDERING + for (int i = 0; i < numLayers - 1; ++i) + { + limits.push_back(make_pair(weightOffset[i], + biasOffset[i][0])); + for (int j = 0; j < layers[i+1].numNeurons - 1; ++j) + { + limits.push_back(make_pair(biasOffset[i][j] + 1, + biasOffset[i][j+1])); + + } + } +#else + throw runtime_error("ERROR: Neuron boundaries not available with " + "alternative (old) weight memory layout, recompile " + "without -DALTERNATIVE_WEIGHT_ORDERING.\n"); +#endif + + return limits; +} + /* void NeuralNetwork::writeStatus(int element, int epoch) { @@ -969,7 +1361,15 @@ long NeuralNetwork::getMemoryUsage() int numNeurons = getNumNeurons(); mem += (numLayers - 1) * sizeof(int); // weightOffset +#ifndef ALTERNATIVE_WEIGHT_ORDERING + mem += (numLayers - 1) * sizeof(int*); // biasOffset + for (int i = 0; i < numLayers - 1; i++) + { + mem += layers[i].numNeurons * sizeof(int); + } +#else mem += (numLayers - 1) * sizeof(int); // biasOffset +#endif mem += (numLayers - 1) * sizeof(int); // biasOnlyOffset mem += numLayers * sizeof(Layer); // layers mem += numNeurons * sizeof(Neuron); // neurons @@ -1008,45 +1408,11 @@ vector NeuralNetwork::info() const { s += strpr(" %3s", "G"); } - else if (layers[j].activationFunction == AF_IDENTITY) - { - s += strpr(" %3s", "l"); - } - else if (layers[j].activationFunction == AF_TANH) - { - s += strpr(" %3s", "t"); - } - else if (layers[j].activationFunction == AF_LOGISTIC) - { - s += strpr(" %3s", "s"); - } - else if (layers[j].activationFunction == AF_SOFTPLUS) - { - s += strpr(" %3s", "p"); - } - else if (layers[j].activationFunction == AF_RELU) - { - s += strpr(" %3s", "r"); - } - else if (layers[j].activationFunction == AF_GAUSSIAN) - { - s += strpr(" %3s", "g"); - } - else if (layers[j].activationFunction == AF_COS) - { - s += strpr(" %3s", "c"); - } - else if (layers[j].activationFunction == AF_REVLOGISTIC) - { - s += strpr(" %3s", "S"); - } - else if (layers[j].activationFunction == AF_EXP) - { - s += strpr(" %3s", "e"); - } - else if (layers[j].activationFunction == AF_HARMONIC) + else { - s += strpr(" %3s", "h"); + s += strpr(" %3s", + stringFromActivation( + layers[j].activationFunction).c_str()); } } else @@ -1059,3 +1425,47 @@ vector NeuralNetwork::info() const return v; } + +string nnp::stringFromActivation(NeuralNetwork::ActivationFunction af) +{ + string c = ""; + + if (af == NeuralNetwork::AF_IDENTITY) c = "l"; + else if (af == NeuralNetwork::AF_TANH) c = "t"; + else if (af == NeuralNetwork::AF_LOGISTIC) c = "s"; + else if (af == NeuralNetwork::AF_SOFTPLUS) c = "p"; + else if (af == NeuralNetwork::AF_RELU) c = "r"; + else if (af == NeuralNetwork::AF_GAUSSIAN) c = "g"; + else if (af == NeuralNetwork::AF_COS) c = "c"; + else if (af == NeuralNetwork::AF_REVLOGISTIC) c = "S"; + else if (af == NeuralNetwork::AF_EXP) c = "e"; + else if (af == NeuralNetwork::AF_HARMONIC) c = "h"; + else + { + throw runtime_error("ERROR: Unknown activation function.\n"); + } + + return c; +} + +NeuralNetwork::ActivationFunction nnp::activationFromString(string c) +{ + NeuralNetwork::ActivationFunction af; + + if (c == "l") af = NeuralNetwork::AF_IDENTITY; + else if (c == "t") af = NeuralNetwork::AF_TANH; + else if (c == "s") af = NeuralNetwork::AF_LOGISTIC; + else if (c == "p") af = NeuralNetwork::AF_SOFTPLUS; + else if (c == "r") af = NeuralNetwork::AF_RELU; + else if (c == "g") af = NeuralNetwork::AF_GAUSSIAN; + else if (c == "c") af = NeuralNetwork::AF_COS; + else if (c == "S") af = NeuralNetwork::AF_REVLOGISTIC; + else if (c == "e") af = NeuralNetwork::AF_EXP; + else if (c == "h") af = NeuralNetwork::AF_HARMONIC; + else + { + throw runtime_error("ERROR: Unknown activation function.\n"); + } + + return af; +} diff --git a/src/libnnp/NeuralNetwork.h b/src/libnnp/NeuralNetwork.h index c56c58e9f..842a4ff80 100644 --- a/src/libnnp/NeuralNetwork.h +++ b/src/libnnp/NeuralNetwork.h @@ -17,8 +17,10 @@ #ifndef NEURALNETWORK_H #define NEURALNETWORK_H +#include // std::size_t #include // std::ofstream #include // std::string +#include // std::pair #include // std::vector namespace nnp @@ -60,32 +62,64 @@ class NeuralNetwork MS_ZEROBIAS, /// Set all weights connecting to the output layer to zero. MS_ZEROOUTPUTWEIGHTS, + /** Normalize weights via number of neuron inputs (fan-in). + * + * If initial weights are uniformly distributed in + * @f$\left[-1, 1\right]@f$ they will be scaled to be in + * @f$\left[-\sqrt{\frac{3}{n_\text{in}}}, + * \sqrt{\frac{3}{n_\text{in}}}\right]@f$, where @f$n_\text{in}@f$ is + * the number of incoming weights of a neuron. The interval is + * valid for #AF_TANH and is slightly modified for #AF_LOGISTIC, + * #AF_RELU and #AF_SOFTPLUS. Weights are not modified for other + * activation functions. + */ + MS_FANIN_UNIFORM, /** Normalize weights via number of neuron inputs (fan-in). * * If initial weights are uniformly distributed in * @f$\left[-1, 1\right]@f$ they will be scaled to be in * @f$\left[\frac{-1}{\sqrt{n_\text{in}}}, * \frac{1}{\sqrt{n_\text{in}}}\right]@f$, where @f$n_\text{in}@f$ is - * the number of incoming weights of a neuron (if activation - * function is of type #AF_TANH). + * the number of incoming weights of a neuron. The interval is + * valid for #AF_TANH and is slightly modified for #AF_LOGISTIC, + * #AF_RELU and #AF_SOFTPLUS. Weights are not modified for other + * activation functions. */ - MS_FANIN, + MS_FANIN_NORMAL, /** Normalize connections according to Glorot and Bengio. * * If initial weights are uniformly distributed in * @f$\left[-1, 1\right]@f$ they will be scaled to be in * @f$\left[-\sqrt{\frac{6}{n_\text{in} + n_\text{out}}}, * \sqrt{\frac{6}{n_\text{in} + n_\text{out}}}\right]@f$, where - * @f$n_\text{in}@f$ and @f$n_\text{out}@f$ are the number of incoming - * and outgoing weights of a neuron, respectively (if activation - * function is of type #AF_TANH). + * @f$n_\text{in}@f$ is the number of incoming weihghts and + * @f$n_\text{out}@f$ is the number of neurons in this layer. + * The interval is valid for #AF_TANH and is slightly modified for + * #AF_LOGISTIC, #AF_RELU and #AF_SOFTPLUS. * * For details see: * - X. Glorot and Y. Bengio, "Understanding the difficulty of * training deep feedforward neural networks", International * conference on artificial intelligence and statistics. 2010. */ - MS_GLOROTBENGIO, + MS_GLOROTBENGIO_UNIFORM, + /** Normalize connections according to Glorot and Bengio. + * + * If initial weights are normally distributed in + * @f$\left[-1, 1\right]@f$ they will be scaled to be in + * @f$\left[-\sqrt{\frac{2}{n_\text{in} + n_\text{out}}}, + * \sqrt{\frac{2}{n_\text{in} + n_\text{out}}}\right]@f$, where + * @f$n_\text{in}@f$ is the number of incoming weihghts and + * @f$n_\text{out}@f$ is the number of neurons in this layer. + * The interval is valid for #AF_TANH and is slightly modified for + * #AF_LOGISTIC, #AF_RELU and #AF_SOFTPLUS. + * + * For details see: + * - X. Glorot and Y. Bengio, "Understanding the difficulty of + * training deep feedforward neural networks", International + * conference on artificial intelligence and statistics. 2010. + */ + MS_GLOROTBENGIO_NORMAL, /** Initialize connections according to Nguyen-Widrow scheme. * * For details see: @@ -152,29 +186,70 @@ class NeuralNetwork * @f[ * \underbrace{ * \overbrace{ - * a^{01}_{00}, \ldots, a^{01}_{0m_1}, - * a^{01}_{10}, \ldots, a^{01}_{1m_1}, + * a^{01}_{00}, \ldots, a^{01}_{n_00}, b^{1}_{0} + * }^{\text{Neuron}^{1}_{0}} + * \overbrace{ + * a^{01}_{01}, \ldots, a^{01}_{n_01}, b^{1}_{1} + * }^{\text{Neuron}^{1}_{1}} + * \ldots, + * \overbrace{ + * a^{01}_{0n_1}, \ldots, a^{01}_{n_0n_1}, b^{1}_{n_1} + * }^{\text{Neuron}^{1}_{n_1}} + * }_{\text{Layer } 0 \rightarrow 1}, + * \underbrace{ + * a^{12}_{00}, \ldots, b^{2}_{n_2} + * }_{\text{Layer } 1 \rightarrow 2}, + * \ldots, + * \underbrace{ + * a^{p-1,p}_{00}, \ldots, b^{p}_{n_p} + * }_{\text{Layer } p-1 \rightarrow p} + * @f] + * where @f$a^{i-1, i}_{jk}@f$ is the weight connecting neuron + * @f$j@f$ in layer @f$i-1@f$ to neuron @f$k@f$ in layer + * @f$i@f$ and @f$b^{i}_{k}@f$ is the bias assigned to neuron + * @f$k@f$ in layer @f$i@f$. + * + * This ordering scheme is used internally to store weights in memory. + * Because all weights and biases connected to a neuron are aligned in a + * continuous block this memory layout simplifies the implementation of + * node-decoupled Kalman filter training (NDEKF). However, for backward + * compatibility reasons this layout is NOT used for storing weights on + * disk (see setConnectionsAO()). + */ + void setConnections(double const* const& connections); + /** Set neural network weights and biases (alternative ordering) + * + * @param[in] connections One-dimensional array with neural network + * connections in the following order: + * @f[ + * \underbrace{ + * \overbrace{ + * a^{01}_{00}, \ldots, a^{01}_{0n_1}, + * a^{01}_{10}, \ldots, a^{01}_{1n_1}, * \ldots, - * a^{01}_{n_00}, \ldots, a^{01}_{n_0m_1}, + * a^{01}_{n_00}, \ldots, a^{01}_{n_0n_1}, * }^{\text{Weights}} * \overbrace{ - * b^{1}_{0}, \ldots, b^{1}_{m_1} + * b^{1}_{0}, \ldots, b^{1}_{n_1} * }^{\text{Biases}} * }_{\text{Layer } 0 \rightarrow 1}, * \underbrace{ - * a^{12}_{00}, \ldots, b^{2}_{m_2} + * a^{12}_{00}, \ldots, b^{2}_{n_2} * }_{\text{Layer } 1 \rightarrow 2}, * \ldots, * \underbrace{ - * a^{p-1,p}_{00}, \ldots, b^{p}_{m_p} + * a^{p-1,p}_{00}, \ldots, b^{p}_{n_p} * }_{\text{Layer } p-1 \rightarrow p} * @f] * where @f$a^{i-1, i}_{jk}@f$ is the weight connecting neuron * @f$j@f$ in layer @f$i-1@f$ to neuron @f$k@f$ in layer * @f$i@f$ and @f$b^{i}_{k}@f$ is the bias assigned to neuron * @f$k@f$ in layer @f$i@f$. + * + * This memory layout is used when storing weights on disk. */ - void setConnections(double const* const& connections); + void setConnectionsAO( + double const* const& connections); /** Get neural network weights and biases. * * @param[out] connections One-dimensional array with neural network @@ -182,6 +257,13 @@ class NeuralNetwork * #setConnections()) */ void getConnections(double* connections) const; + /** Get neural network weights and biases (alternative ordering). + * + * @param[out] connections One-dimensional array with neural network + * connections (same order as described in + * #setConnectionsAO()) + */ + void getConnectionsAO(double* connections) const; /** Initialize connections with random numbers. * * @param[in] seed Random number generator seed. @@ -211,16 +293,28 @@ class NeuralNetwork ModificationScheme modificationScheme, double parameter1, double parameter2); - /** Set neural network input layer node values. + /** Set neural network input layer neuron values. * - * @param[in] input Input layer node values. + * @param[in] input Input layer neuron values. */ void setInput(double const* const& input) const; - /** Get neural network output layer node values. + /** Get neural network output layer neuron values. * - * @param[out] output Output layer node values. + * @param[out] output Output layer neuron values. */ void getOutput(double* output) const; + /** Get neuron values (activations) for all layers. + * + * @return Vector of neuron values per layer. + */ + std::vector< + std::vector> getNeuronsPerLayer() const; + /** Get gradients (derivatives of output w.r.t. weights) for all layers. + * + * @return Vector of gradients per layer. + */ + std::vector< + std::vector> getGradientsPerLayer() const; /** Propagate input information through all layers. * * With the input data set by #setInput() this will calculate all remaining @@ -295,10 +389,20 @@ class NeuralNetwork void calculateDFdc(double* dFdc, double const* const& dGdxyz) const; /** Write connections to file. + * + * __CAUTION__: For compatibility reasons this format is NOT used for + * storing NN weights to disk! * * @param[in,out] file File stream to write to. */ void writeConnections(std::ofstream& file) const; + /** Write connections to file (alternative ordering). + * + * This NN weights ordering layout is used when writing weights to disk. + * + * @param[in,out] file File stream to write to. + */ + void writeConnectionsAO(std::ofstream& file) const; /** Return gathered neuron statistics. * * @param[out] count Number of neuron output value calculations. @@ -333,6 +437,24 @@ class NeuralNetwork * Counters and summation variables for neuron statistics are reset. */ void resetNeuronStatistics(); + /** Get layer limits in combined weight vector (for Kalman filter + * decoupling setup). + * + * @return Vector with layer limits as pair (begin, end), + * (numLayers - 1) points. + */ + std::vector> getLayerLimits() const; + /** Get neuron limits in combined weight vector (for Kalman filter + * decoupling setup). + * + * @return Vector with neuron limits as pair (begin, end), + * (numNeurons - inputLayer->numNeurons) points. + */ + std::vector> getNeuronLimits() const; //void writeStatus(int, int); long getMemoryUsage(); /** Print neural network architecture. @@ -404,8 +526,13 @@ class NeuralNetwork int numHiddenLayers; /// Offset adress of weights per layer in combined weights+bias array. int* weightOffset; +#ifndef ALTERNATIVE_WEIGHT_ORDERING + /// Offset adress of each neuron per layer in combined weights+bias array. + int** biasOffset; +#else /// Offset adress of biases per layer in combined weights+bias array. int* biasOffset; +#endif /// Offset adress of biases per layer in bias only array. int* biasOnlyOffset; /// Pointer to input layer. @@ -480,6 +607,30 @@ class NeuralNetwork void propagateLayer(Layer& layer, Layer& layerPrev); }; +/** Convert string to activation function. + * + * @param[in] letter Single character representing activation function. + * + * @return ActivationFunction corresponding to string. + */ +NeuralNetwork::ActivationFunction activationFromString(std::string c); +/** Convert activation function to string. + * + * @param[in] af Input ActivationFunction type. + * + * @return Single char representing activation function. + */ +std::string stringFromActivation(NeuralNetwork::ActivationFunction af); + +/// Simple class for managing neural network settings. +struct NeuralNetworkTopology +{ + int numLayers = 0; + std::vector numNeuronsPerLayer; + std::vector< + NeuralNetwork::ActivationFunction> activationFunctionsPerLayer; +}; + } #endif diff --git a/src/libnnp/NeuralNetworx.cpp b/src/libnnp/NeuralNetworx.cpp new file mode 100644 index 000000000..2c8d4e539 --- /dev/null +++ b/src/libnnp/NeuralNetworx.cpp @@ -0,0 +1,725 @@ +// n2p2 - A neural network potential package +// Copyright (C) 2018 Andreas Singraber (University of Vienna) +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#include "NeuralNetworx.h" +#include "utility.h" +#include // std::min, std::max +#include +#include + +#define EXP_LIMIT 35.0 + +using namespace std; +using namespace Eigen; +using namespace nnp; + +NeuralNetworx::NeuralNetworx(vector numNeuronsPerLayer, + vector activationPerLayer) : + numLayers (0), + numConnections (0), + numWeights (0), + numBiases (0) +{ + initialize(numNeuronsPerLayer, activationPerLayer); +} + +NeuralNetworx::NeuralNetworx(vector numNeuronsPerLayer, + vector activationStringPerLayer) : + numLayers (0), + numConnections (0), + numWeights (0), + numBiases (0) +{ + // Set irrelevant input layer activation to linear. + activationStringPerLayer.at(0) = "l"; + vector activationPerLayer; + for (auto a : activationStringPerLayer) + { + activationPerLayer.push_back(activationFromString(a)); + } + + initialize(numNeuronsPerLayer, activationPerLayer); +} + +NeuralNetworx::Layer::Layer(size_t numNeurons, + size_t numNeuronsPrevLayer, + Activation activation) : + numNeurons (numNeurons), + numNeuronsPrevLayer(numNeuronsPrevLayer), + fa (activation) +{ + // Input layer has no previous layer. + if (numNeuronsPrevLayer > 0) + { + w.resize(numNeurons, numNeuronsPrevLayer); + b.resize(numNeurons); + x.resize(numNeurons); + dydx.resize(numNeurons); + d2ydx2.resize(numNeurons); + } + y.resize(numNeurons); +} + +void NeuralNetworx::Layer::propagate(VectorXd const& yp) +{ + x = w * yp + b; + + if (fa == Activation::IDENTITY) + { + y = x; + dydx.setOnes(); + d2ydx2.setZero(); + } + else if (fa == Activation::TANH) + { + y = x.array().tanh(); + dydx = 1.0 - y.array() * y.array(); + d2ydx2 = -2.0 * y.array() * dydx.array(); + } + else if (fa == Activation::LOGISTIC) + { + y = x.array().unaryExpr( + [](double xi) + { + if (xi > EXP_LIMIT) return 1.0; + else if (xi < -EXP_LIMIT) return 0.0; + else return 1.0 / (1.0 + exp(-xi)); + }); + //dydx = (y.array() <= 0.0 || y.array() >= 1.0).select( + // 0.0, + // y.array() * (1.0 - y.array())); + dydx = y.array().unaryExpr( + [](double yi) + { + if (yi <= 0.0 || yi >= 1.0) return 0.0; + else return yi * (1.0 - yi); + }); + //d2ydx2 = (y.array() <= 0.0 || y.array() >= 1.0).select( + // 0.0, + // y.array() * (1.0 - y.array()) * (1.0 - 2.0 * y.array())); + d2ydx2 = y.array().unaryExpr( + [](double yi) + { + if (yi <= 0.0 || yi >= 1.0) return 0.0; + else return yi * (1.0 - yi) * (1.0 - 2.0 * yi); + ; + }); + } + else if (fa == Activation::SOFTPLUS) + { + y = x.array().unaryExpr( + [](double xi) + { + if (xi > EXP_LIMIT) return xi; + else if (xi < -EXP_LIMIT) return 0.0; + else return log(1.0 + exp(xi)); + }); + dydx = x.array().unaryExpr( + [](double xi) + { + if (xi > EXP_LIMIT) return 1.0; + else if (xi < -EXP_LIMIT) return 0.0; + else return 1.0 / (1.0 + exp(-xi)); + }); + d2ydx2 = x.array().unaryExpr( + [](double xi) + { + if (xi > EXP_LIMIT) return 0.0; + else if (xi < -EXP_LIMIT) return 0.0; + else + { + double const tmp = 1.0 / (1.0 + exp(-xi)); + return tmp * (1.0 - tmp); + } + ; + }); + } + else if (fa == Activation::RELU) + { + //y = (x.array() > 0.0).select(x, 0.0); + y = x.array().unaryExpr( + [](double xi) + { + if (xi > 0.0) return xi; + else return 0.0; + }); + //dydx = (x.array() > 0.0).select(VectorXd::Ones(dydx.size()) 0.0); + dydx = x.array().unaryExpr( + [](double xi) + { + if (xi > 0.0) return 1.0; + else return 0.0; + }); + d2ydx2.setZero(); + } + else if (fa == Activation::GAUSSIAN) + { + y = (-0.5 * x.array() * x.array()).exp(); + dydx = -x.array() * y.array(); + d2ydx2 = (x.array() * x.array() - 1.0) * y.array(); + } + else if (fa == Activation::COS) + { + y = x.array().cos(); + dydx = x.array().sin(); + d2ydx2 = -y.array(); + } + else if (fa == Activation::REVLOGISTIC) + { + VectorXd const tmp = 1.0 / (1.0 + (-x.array()).exp()); + y = 1.0 - tmp.array(); + dydx = tmp.array() * (tmp.array() - 1.0); + d2ydx2 = tmp.array() * (tmp.array() - 1.0) * (1.0 - 2.0 * tmp.array()); + } + else if (fa == Activation::EXP) + { + y = (-x.array()).exp(); + dydx = -y.array(); + d2ydx2 = y.array(); + } + else if (fa == Activation::HARMONIC) + { + y = x.array() * x.array(); + dydx = 2.0 * x.array(); + d2ydx2.setConstant(2.0); + } + + return; +} + +void NeuralNetworx::Layer::initializeDerivInput() +{ + // Multiply each row of weights (i.e. all weights connected to a neuron + // of this layer) with derivative of the activation function. + dydyi = dydx.asDiagonal() * w; + + return; +} + +void NeuralNetworx::Layer::propagateDerivInput(MatrixXd const& dypdyi) +{ + dydyi = dydx.asDiagonal() * (w * dypdyi); + + return; +} + +void NeuralNetworx::setInput(vector const& input) +{ + if (input.size() != layers.at(0).numNeurons) + { + throw runtime_error("ERROR: Input vector size does not match number " + "of input layer neurons."); + } + Map y0(input.data(), input.size()); + layers.at(0).y = y0; + + return; +} + +void NeuralNetworx::propagate(bool deriv) +{ + for (size_t i = 1; i < layers.size(); ++i) + { + layers.at(i).propagate(layers.at(i - 1).y); + if (deriv) + { + if (i == 1) layers.at(i).initializeDerivInput(); + else layers.at(i).propagateDerivInput(layers.at(i - 1).dydyi); + } + } + + return; +} + +void NeuralNetworx::propagate(vector const& input, bool deriv) +{ + setInput(input); + propagate(deriv); + + return; +} + +void NeuralNetworx::getOutput(vector& output) const +{ + output.clear(); + output.insert(output.begin(), + layers.back().y.data(), + layers.back().y.data() + layers.back().y.size()); + + return; +} + +void NeuralNetworx::getDerivInput(vector>& derivInput) const +{ + derivInput.clear(); + derivInput.resize(layers.back().dydyi.rows()); + for (size_t i = 0; i < derivInput.size(); ++i) + { + for (size_t j = 0; j < layers.at(0).numNeurons; ++j) + { + derivInput.at(i).push_back(layers.back().dydyi(i, j)); + } + } + + return; +} + +void NeuralNetworx::setConnections(vector const& connections) +{ + if (connections.size() != numConnections) + { + throw runtime_error("ERROR: Provided vector has incorrect length.\n"); + } + + Map c(connections.data(), connections.size()); + + size_t count = 0; + for (vector::iterator l = layers.begin() + 1; + l != layers.end(); ++l) + { + size_t const n = l->w.cols(); + for (Index i = 0; i < l->y.size(); ++i) + { + l->w.row(i) = c.segment(count, n); + count += n; + l->b(i) = c(count); + count++; + } + } + + return; +} + +void NeuralNetworx::setConnectionsAO(vector const& connections) +{ + if (connections.size() != numConnections) + { + throw runtime_error("ERROR: Provided vector has incorrect length.\n"); + } + + Map c(connections.data(), connections.size()); + + size_t count = 0; + for (vector::iterator l = layers.begin() + 1; + l != layers.end(); ++l) + { + size_t const n = l->w.rows(); + for (Index i = 0; i < l->w.cols(); ++i) + { + l->w.col(i) = c.segment(count, n); + count += n; + } + l->b = c.segment(count, l->b.size()); + count += l->b.size(); + } + + return; +} + +void NeuralNetworx::getConnections(vector& connections) const +{ + if (connections.size() != numConnections) + { + throw runtime_error("ERROR: Provided vector has incorrect length.\n"); + } + + Map c(connections.data(), connections.size()); + + size_t count = 0; + for (vector::const_iterator l = layers.begin() + 1; + l != layers.end(); ++l) + { + size_t const n = l->w.cols(); + for (Index i = 0; i < l->y.size(); ++i) + { + c.segment(count, n) = l->w.row(i); + count += n; + c(count) = l->b(i); + count++; + } + } + + return; +} + +void NeuralNetworx::getConnectionsAO(vector& connections) const +{ + if (connections.size() != numConnections) + { + throw runtime_error("ERROR: Provided vector has incorrect length.\n"); + } + + Map c(connections.data(), connections.size()); + + size_t count = 0; + for (vector::const_iterator l = layers.begin() + 1; + l != layers.end(); ++l) + { + size_t const n = l->w.rows(); + for (Index i = 0; i < l->w.cols(); ++i) + { + c.segment(count, n) = l->w.col(i); + count += n; + } + c.segment(count, l->b.size()) = l->b; + count += l->b.size(); + } + + return; +} + +void NeuralNetworx::writeConnections(std::ofstream& file) const +{ + // File header. + vector title; + vector colName; + vector colInfo; + vector colSize; + title.push_back("Neural network connection values (weights and biases)."); + colSize.push_back(24); + colName.push_back("connection"); + colInfo.push_back("Neural network connection value."); + colSize.push_back(1); + colName.push_back("t"); + colInfo.push_back("Connection type (a = weight, b = bias)."); + colSize.push_back(9); + colName.push_back("index"); + colInfo.push_back("Index enumerating weights."); + colSize.push_back(5); + colName.push_back("l_s"); + colInfo.push_back("Starting point layer (end point layer for biases)."); + colSize.push_back(5); + colName.push_back("n_s"); + colInfo.push_back("Starting point neuron in starting layer (end point " + "neuron for biases)."); + colSize.push_back(5); + colName.push_back("l_e"); + colInfo.push_back("End point layer."); + colSize.push_back(5); + colName.push_back("n_e"); + colInfo.push_back("End point neuron in end layer."); + appendLinesToFile(file, + createFileHeader(title, colSize, colName, colInfo)); + + size_t count = 0; + for (size_t i = 1; i < layers.size(); ++i) + { + Layer const& l = layers.at(i); + for (size_t j = 0; j < l.numNeurons; j++) + { + for (size_t k = 0; k < l.numNeuronsPrevLayer; k++) + { + count++; + file << strpr("%24.16E a %9d %5d %5d %5d %5d\n", + l.w(j, k), + count, + i - 1, + k + 1, + i, + j + 1); + } + count++; + file << strpr("%24.16E b %9d %5d %5d\n", + l.b(j), + count, + i, + j + 1); + } + } + + return; +} + +void NeuralNetworx::writeConnectionsAO(std::ofstream& file) const +{ + // File header. + vector title; + vector colName; + vector colInfo; + vector colSize; + title.push_back("Neural network connection values (weights and biases)."); + colSize.push_back(24); + colName.push_back("connection"); + colInfo.push_back("Neural network connection value."); + colSize.push_back(1); + colName.push_back("t"); + colInfo.push_back("Connection type (a = weight, b = bias)."); + colSize.push_back(9); + colName.push_back("index"); + colInfo.push_back("Index enumerating weights."); + colSize.push_back(5); + colName.push_back("l_s"); + colInfo.push_back("Starting point layer (end point layer for biases)."); + colSize.push_back(5); + colName.push_back("n_s"); + colInfo.push_back("Starting point neuron in starting layer (end point " + "neuron for biases)."); + colSize.push_back(5); + colName.push_back("l_e"); + colInfo.push_back("End point layer."); + colSize.push_back(5); + colName.push_back("n_e"); + colInfo.push_back("End point neuron in end layer."); + appendLinesToFile(file, + createFileHeader(title, colSize, colName, colInfo)); + + size_t count = 0; + for (size_t i = 1; i < layers.size(); ++i) + { + Layer const& l = layers.at(i); + for (size_t j = 0; j < l.numNeuronsPrevLayer; j++) + { + for (size_t k = 0; k < l.numNeurons; k++) + { + count++; + file << strpr("%24.16E a %9d %5d %5d %5d %5d\n", + l.w(k, j), + count, + i - 1, + j + 1, + i, + k + 1); + } + } + for (size_t j = 0; j < l.numNeurons; j++) + { + count++; + file << strpr("%24.16E b %9d %5d %5d\n", + l.b(j), + count, + i, + j + 1); + } + } + + return; +} + +map NeuralNetworx::getNeuronProperties(size_t layer, + size_t neuron) const +{ + map properties; + + properties["y"] = layers.at(layer).y(neuron); + if (layers.at(layer).numNeuronsPrevLayer > 0) + { + properties["x"] = layers.at(layer).x(neuron); + properties["dydx"] = layers.at(layer).dydx(neuron); + properties["d2ydx2"] = layers.at(layer).d2ydx2(neuron); + } + + return properties; +} + +vector> NeuralNetworx::getLayerLimits() const +{ + vector> limits; + + for (size_t i = 0; i < numLayers - 2; ++i) + { + limits.push_back(make_pair(offsetLayer.at(i), + offsetLayer.at(i + 1) - 1)); + + } + limits.push_back(make_pair(offsetLayer.at(numLayers - 2), + numConnections - 1)); + + return limits; +} + +vector> NeuralNetworx::getNeuronLimits() const +{ + vector> limits; + + for (size_t i = 0; i < numLayers - 1; ++i) + { + size_t const n = layers.at(i + 1).numNeurons; + for (size_t j = 0; j < n - 1; ++j) + { + limits.push_back(make_pair(offsetNeuron.at(i).at(j), + offsetNeuron.at(i).at(j + 1) - 1)); + } + if (i == numLayers - 1) + { + limits.push_back(make_pair(offsetNeuron.at(i).at(n - 1), + numConnections - 1)); + } + else + { + limits.push_back(make_pair(offsetNeuron.at(i).at(n - 1), + offsetNeuron.at(i + 1).at(0) - 1)); + } + } + + return limits; +} + +vector NeuralNetworx::info() const +{ + vector v; + Eigen::Index maxNeurons = 0; + + v.push_back(strpr("Number of layers : %6zu\n", numLayers)); + v.push_back(strpr("Number of neurons : %6zu\n", numNeurons)); + v.push_back(strpr("Number of connections: %6zu\n", numConnections)); + v.push_back(strpr("Number of weights : %6zu\n", numWeights)); + v.push_back(strpr("Number of biases : %6zu\n", numBiases)); + v.push_back(strpr("Architecture ")); + for (size_t i = 0; i < layers.size(); ++i) + { + maxNeurons = max(layers.at(i).y.size(), + maxNeurons); + } + v.push_back("\n"); + v.push_back("-----------------------------------------" + "--------------------------------------\n"); + + for (size_t i = 0; i < static_cast(maxNeurons); ++i) + { + v.push_back(strpr("%4d", i + 1)); + string s = ""; + for (size_t j = 0; j < layers.size(); ++j) + { + if (i < static_cast(layers.at(j).y.size())) + { + if (j == 0) + { + s += strpr(" %3s", "G"); + } + else + { + s += strpr(" %3s", + stringFromActivation(layers.at(j).fa).c_str()); + } + } + else + { + s += " "; + } + } + v.push_back(s += "\n"); + } + + return v; +} + +void NeuralNetworx::initialize(vector numNeuronsPerLayer, + vector activationPerLayer) +{ + if (numNeuronsPerLayer.size() < 2) + { + throw runtime_error(strpr("ERROR: Neural network must have at least " + "two layers (intput = %zu)\n.", + numNeuronsPerLayer.size())); + } + if (numNeuronsPerLayer.size() != activationPerLayer.size()) + { + throw runtime_error("ERROR: Number of layers is inconsistent.\n"); + } + + // Input layer has no neurons from previous layer. + numNeuronsPerLayer.insert(numNeuronsPerLayer.begin(), 0); + numLayers = 0; + layers.clear(); + for (size_t i = 0; i < activationPerLayer.size(); ++i) + { + if (numNeuronsPerLayer.at(i + 1) < 1) + { + throw runtime_error("ERROR: Layer needs at least one neuron.\n"); + } + layers.push_back(Layer(numNeuronsPerLayer.at(i + 1), + numNeuronsPerLayer.at(i), + activationPerLayer.at(i))); + numLayers++; + } + + // Compute number of connections, weights and biases. + numNeurons = 0; + numConnections = 0; + numWeights = 0; + numBiases = 0; + for (auto const& l : layers) + { + numNeurons += l.y.size(); + numConnections += l.w.size() + l.b.size(); + numWeights += l.w.size(); + numBiases += l.b.size(); + } + + // Compute layer and neuron offsets for combined connections vector. + offsetLayer.resize(numLayers - 1, 0); + offsetNeuron.resize(numLayers - 1); + size_t count = 0; + for (size_t i = 1; i < numLayers; ++i) + { + offsetLayer.push_back(count); + for (size_t j = 0; j < layers.at(i).numNeurons; ++j) + { + offsetNeuron.at(i - 1).push_back(count); + // Weights of neuron j. + count += layers.at(i).numNeuronsPrevLayer; + // Bias of neuron j. + count++; + } + } + + return; +} + + +string nnp::stringFromActivation(NeuralNetworx::Activation a) +{ + string c = ""; + + if (a == NeuralNetworx::Activation::IDENTITY) c = "l"; + else if (a == NeuralNetworx::Activation::TANH) c = "t"; + else if (a == NeuralNetworx::Activation::LOGISTIC) c = "s"; + else if (a == NeuralNetworx::Activation::SOFTPLUS) c = "p"; + else if (a == NeuralNetworx::Activation::RELU) c = "r"; + else if (a == NeuralNetworx::Activation::GAUSSIAN) c = "g"; + else if (a == NeuralNetworx::Activation::COS) c = "c"; + else if (a == NeuralNetworx::Activation::REVLOGISTIC) c = "S"; + else if (a == NeuralNetworx::Activation::EXP) c = "e"; + else if (a == NeuralNetworx::Activation::HARMONIC) c = "h"; + else + { + throw runtime_error("ERROR: Unknown activation function.\n"); + } + + return c; +} + +NeuralNetworx::Activation nnp::activationFromString(string c) +{ + NeuralNetworx::Activation a; + + if (c == "l") a = NeuralNetworx::Activation::IDENTITY; + else if (c == "t") a = NeuralNetworx::Activation::TANH; + else if (c == "s") a = NeuralNetworx::Activation::LOGISTIC; + else if (c == "p") a = NeuralNetworx::Activation::SOFTPLUS; + else if (c == "r") a = NeuralNetworx::Activation::RELU; + else if (c == "g") a = NeuralNetworx::Activation::GAUSSIAN; + else if (c == "c") a = NeuralNetworx::Activation::COS; + else if (c == "S") a = NeuralNetworx::Activation::REVLOGISTIC; + else if (c == "e") a = NeuralNetworx::Activation::EXP; + else if (c == "h") a = NeuralNetworx::Activation::HARMONIC; + else + { + throw runtime_error("ERROR: Unknown activation function.\n"); + } + + return a; +} diff --git a/src/libnnp/NeuralNetworx.h b/src/libnnp/NeuralNetworx.h new file mode 100644 index 000000000..bde12ec9f --- /dev/null +++ b/src/libnnp/NeuralNetworx.h @@ -0,0 +1,380 @@ +// n2p2 - A neural network potential package +// Copyright (C) 2018 Andreas Singraber (University of Vienna) +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#ifndef NEURALNETWORX_H +#define NEURALNETWORX_H + +#include // std::size_t +#include // std::map +#include // std::string +#include // std::pair +#include // std::vector +#include + +namespace nnp +{ + +/// This class implements a feed-forward neural network with Eigen. +class NeuralNetworx +{ +public: + + /// List of available activation function types. + enum class Activation + { + /// @f$f_a(x) = x@f$ + IDENTITY, + /// @f$f_a(x) = \tanh(x)@f$ + TANH, + /// @f$f_a(x) = 1 / (1 + \mathrm{e}^{-x})@f$ + LOGISTIC, + /// @f$f_a(x) = \ln (1 + \mathrm{e}^x)@f$ + SOFTPLUS, + /// @f$f_a(x) = \max(0, x)@f$ (NOT recommended for HDNNPs!) + RELU, + /// @f$f_a(x) = \mathrm{e}^{-x^2 / 2}@f$ + GAUSSIAN, + /// @f$f_a(x) = \cos (x)@f$ + COS, + /// @f$f_a(x) = 1 - 1 / (1 + \mathrm{e}^{-x})@f$ + REVLOGISTIC, + /// @f$f_a(x) = \mathrm{e}^{-x}@f$ + EXP, + /// @f$f_a(x) = x^2@f$ + HARMONIC + }; + + /// One layer of a feed-forward neural network. + struct Layer + { + /** Constructor for a single layer. + * + * @param[in] numNeurons Number of neurons in this layer. + * @param[in] numNeuronsPrevLayer Number of neurons in previous layer. + * @param[in] activation Activation function used in this layer. + */ + Layer(std::size_t numNeurons, + std::size_t numNeuronsPrevLayer, + Activation activation); + /** Propagate information through this layer. + * + * @param[in] yp Neuron values from previous layer. + */ + void propagate(Eigen::VectorXd const& yp); + /** Initialize #dydyi in this layer. + */ + void initializeDerivInput(); + /** Propagate derivative information through this layer. + * + * @param[in] dypdyi Derivatives from previous layer. + */ + void propagateDerivInput(Eigen::MatrixXd const& dypdyi); + + /// Number of neurons in this layer. + std:: size_t numNeurons; + /// Number of neurons in previous layer. + std:: size_t numNeuronsPrevLayer; + /// Common activation function for all neurons in this layer. + Activation fa; + /// Weights from previous to this layer. + Eigen::MatrixXd w; + /// Biases assigned to neurons in this layer. + Eigen::VectorXd b; + /// Neuron weighted sum before activation function. + Eigen::VectorXd x; + /// Neuron values (activation function applied). + Eigen::VectorXd y; + /// Neuron value derivative w.r.t. activation function argument. + Eigen::VectorXd dydx; + /// Neuron value second derivative w.r.t. activation function argument. + Eigen::VectorXd d2ydx2; + /// Derivative of neurons w.r.t. neurons in input layer. + Eigen::MatrixXd dydyi; + }; + + /** Neural network class constructor. + * + * @param[in] numNeuronsPerLayer Array with number of neurons per layer. + * @param[in] activationPerLayer Array with activation function type per + * layer (note: input layer activation + * function is mandatory although it is never + * used). + */ + NeuralNetworx(std::vector numNeuronsPerLayer, + std::vector activationPerLayer); + /** Neural network class constructor with strings for activation types. + * + * @param[in] numNeuronsPerLayer Array with number of neurons per layer. + * @param[in] activationStringPerLayer Array with activation function type + * per layer (note: input layer + * activation function is mandatory + * although it is never used). + */ + NeuralNetworx(std::vector numNeuronsPerLayer, + std::vector activationStringPerLayer); + /** Set input layer neuron values. + * + * @param[in] input Vector with input neuron values. + */ + void setInput(std::vector const& input); + /** Propagate information through all layers (input already set). + * + * @param[in] deriv Propagate also derivative of output w.r.t. input + * neurons. + */ + void propagate(bool deriv = false); + /** Propagate information through all layers. + * + * @param[in] input Vector with input neuron values. + * @param[in] deriv Propagate also derivative of output w.r.t. input + * neurons. + */ + void propagate(std::vector const& input, + bool deriv = false); + /** Get output layer neuron values. + * + * @param[out] output Vector with output layer neuron values. + */ + void getOutput(std::vector& output) const; + /** Get derivative of output layer neurons w.r.t to input neurons. + * + * @param[out] derivInput Vector with output layer neuron values. + */ + void getDerivInput(std::vector>& derivInput) const; + /** Set neural network weights and biases. + * + * @param[in] connections One-dimensional vector with neural network + * connections in the following order: + * @f[ + * \underbrace{ + * \overbrace{ + * a^{01}_{00}, \ldots, a^{01}_{n_00}, b^{1}_{0} + * }^{\text{Neuron}^{1}_{0}} + * \overbrace{ + * a^{01}_{01}, \ldots, a^{01}_{n_01}, b^{1}_{1} + * }^{\text{Neuron}^{1}_{1}} + * \ldots, + * \overbrace{ + * a^{01}_{0n_1}, \ldots, a^{01}_{n_0n_1}, b^{1}_{n_1} + * }^{\text{Neuron}^{1}_{n_1}} + * }_{\text{Layer } 0 \rightarrow 1}, + * \underbrace{ + * a^{12}_{00}, \ldots, b^{2}_{n_2} + * }_{\text{Layer } 1 \rightarrow 2}, + * \ldots, + * \underbrace{ + * a^{p-1,p}_{00}, \ldots, b^{p}_{n_p} + * }_{\text{Layer } p-1 \rightarrow p} + * @f] + * where @f$a^{i-1, i}_{jk}@f$ is the weight connecting neuron + * @f$j@f$ in layer @f$i-1@f$ to neuron @f$k@f$ in layer + * @f$i@f$ and @f$b^{i}_{k}@f$ is the bias assigned to neuron + * @f$k@f$ in layer @f$i@f$. + * + * This ordering scheme is used internally to store weights in memory. + * Because all weights and biases connected to a neuron are aligned in a + * continuous block this memory layout simplifies the implementation of + * node-decoupled Kalman filter training (NDEKF). However, for backward + * compatibility reasons this layout is NOT used for storing weights on + * disk (see setConnectionsAO()). + */ + void setConnections(std::vector const& + connections); + /** Set neural network weights and biases (alternative ordering) + * + * @param[in] connections One-dimensional vector with neural network + * connections in the following order: + * @f[ + * \underbrace{ + * \overbrace{ + * a^{01}_{00}, \ldots, a^{01}_{0n_1}, + * a^{01}_{10}, \ldots, a^{01}_{1n_1}, + * \ldots, + * a^{01}_{n_00}, \ldots, a^{01}_{n_0n_1}, + * }^{\text{Weights}} + * \overbrace{ + * b^{1}_{0}, \ldots, b^{1}_{n_1} + * }^{\text{Biases}} + * }_{\text{Layer } 0 \rightarrow 1}, + * \underbrace{ + * a^{12}_{00}, \ldots, b^{2}_{n_2} + * }_{\text{Layer } 1 \rightarrow 2}, + * \ldots, + * \underbrace{ + * a^{p-1,p}_{00}, \ldots, b^{p}_{n_p} + * }_{\text{Layer } p-1 \rightarrow p} + * @f] + * where @f$a^{i-1, i}_{jk}@f$ is the weight connecting neuron + * @f$j@f$ in layer @f$i-1@f$ to neuron @f$k@f$ in layer + * @f$i@f$ and @f$b^{i}_{k}@f$ is the bias assigned to neuron + * @f$k@f$ in layer @f$i@f$. + * + * This memory layout is used when storing weights on disk. + */ + void setConnectionsAO(std::vector const& + connections); + /** Get neural network weights and biases. + * + * @param[out] connections One-dimensional vector with neural network + * connections (same order as described in + * #setConnections()) + */ + void getConnections(std::vector& + connections) const; + /** Get neural network weights and biases (alternative ordering). + * + * @param[out] connections One-dimensional vector with neural network + * connections (same order as described in + * #setConnectionsAO()) + */ + void getConnectionsAO(std::vector& + connections) const; + /** Write connections to file. + * + * __CAUTION__: For compatibility reasons this format is NOT used for + * storing NN weights to disk! + * + * @param[in,out] file File stream to write to. + */ + void writeConnections(std::ofstream& file) const; + /** Write connections to file (alternative ordering). + * + * This NN weights ordering layout is used when writing weights to disk. + * + * @param[in,out] file File stream to write to. + */ + void writeConnectionsAO(std::ofstream& file) const; + /** Get properties for a single neuron. + * + * @param[in] layer Index of layer (starting from 0 = input layer). + * @param[in] neuron Index of neuron (starting from 0 = first neuron in + * layer). + * + * @return Map from neuron's property (as string) to value. + */ + std::map< + std::string, double> getNeuronProperties(std::size_t layer, + std::size_t neuron) const; + /** Get layer limits in combined weight vector (for Kalman filter + * decoupling setup). + * + * @return Vector with layer limits as pair (begin, end), + * (numLayers - 1) points. + */ + std::vector> getLayerLimits() const; + /** Get neuron limits in combined weight vector (for Kalman filter + * decoupling setup). + * + * @return Vector with neuron limits as pair (begin, end), + * (numNeurons - inputLayer->numNeurons) points. + */ + std::vector> getNeuronLimits() const; + /** Printable strings with neural network architecture information. + * + * @return Vector of lines for printing. + */ + std::vector info() const; + /// Getter for #numLayers + std::size_t getNumLayers() const; + /// Getter for #numNeurons + std::size_t getNumNeurons() const; + /// Getter for #numConnections + std::size_t getNumConnections() const; + /// Getter for #numWeights + std::size_t getNumWeights() const; + /// Getter for #numBiases + std::size_t getNumBiases() const; + +private: + + /** Class initialization. + * + * @param[in] numNeuronsPerLayer Array with number of neurons per layer. + * @param[in] activationPerLayer Array with activation function type per + * layer (note: input layer activation + * function is mandatory although it is never + * used). + * + * Avoid duplicate code due to overloaded constructor. + */ + void initialize(std::vector numNeuronsPerLayer, + std::vector activationPerLayer); + + /// Number of neural network layers. + std::size_t numLayers; + /// Number of neurons. + std::size_t numNeurons; + /// Number of neural network connections (weights + biases). + std::size_t numConnections; + /// Number of neural network weights. + std::size_t numWeights; + /// Number of neural network biases. + std::size_t numBiases; + /// Offset of layers in combined connection vector. + std::vector< + std::size_t> offsetLayer; + /// Offset of neurons in combined connection vector. + std::vector< + std::vector< + std::size_t>> offsetNeuron; + /// Offset of biases in combined connection vector. + /// Vector of neural network layers. + std::vector layers; + +}; + +/** Convert string to activation function. + * + * @param[in] letter String representing activation function. + * + * @return Activation corresponding to string. + */ +NeuralNetworx::Activation activationFromString(std::string c); +/** Convert activation function to string. + * + * @param[in] af Input ActivationFunction type. + * + * @return String representing activation function. + */ +std::string stringFromActivation(NeuralNetworx::Activation a); + +////////////////////////////////// +// Inlined function definitions // +////////////////////////////////// + +inline +std::size_t NeuralNetworx::getNumLayers() const {return numLayers;} + +inline +std::size_t NeuralNetworx::getNumNeurons() const {return numNeurons;} + +inline +std::size_t NeuralNetworx::getNumConnections() const {return numConnections;} + +inline +std::size_t NeuralNetworx::getNumWeights() const {return numWeights;} + +inline +std::size_t NeuralNetworx::getNumBiases() const {return numBiases;} + +} + +#endif diff --git a/src/libnnp/Settings.cpp b/src/libnnp/Settings.cpp index 68c0638d8..7f6a2c2ca 100644 --- a/src/libnnp/Settings.cpp +++ b/src/libnnp/Settings.cpp @@ -73,6 +73,7 @@ map const createKnownKeywordsMap() m["jacobian_mode" ] = ""; m["update_strategy" ] = ""; m["selection_mode" ] = ""; + m["decoupling_type" ] = ""; m["task_batch_size_energy" ] = ""; m["task_batch_size_force" ] = ""; m["gradient_type" ] = ""; diff --git a/src/libnnp/makefile b/src/libnnp/makefile index ba03f653e..c6354e077 100644 --- a/src/libnnp/makefile +++ b/src/libnnp/makefile @@ -45,7 +45,7 @@ AR=$(PROJECT_AR) ARFLAGS=$(PROJECT_ARFLAGS) # Extra include paths for compiling. -INCLUDES=-I./ +INCLUDES=-I./ -I${PROJECT_EIGEN} ############################################################################### diff --git a/src/libnnptrain/KalmanFilter.cpp b/src/libnnptrain/KalmanFilter.cpp index ab55a8419..5d02b44c2 100644 --- a/src/libnnptrain/KalmanFilter.cpp +++ b/src/libnnptrain/KalmanFilter.cpp @@ -16,9 +16,11 @@ #include "KalmanFilter.h" #include "utility.h" +#include "mpi-extra.h" #include #include #include +#include // std::map using namespace Eigen; using namespace std; @@ -27,8 +29,11 @@ using namespace nnp; KalmanFilter::KalmanFilter(size_t const sizeState, KalmanType const type) : Updater(sizeState), + myRank (0 ), + numProcs (0 ), sizeObservation(0 ), numUpdates (0 ), + sizeP (0 ), epsilon (0.0 ), q (0.0 ), q0 (0.0 ), @@ -62,17 +67,130 @@ KalmanFilter::KalmanFilter(size_t const sizeState, w = new Map(0, sizeState); xi = new Map(0, sizeObservation); H = new Map(0, sizeState, sizeObservation); - P.resize(sizeState, sizeState); - P.setIdentity(); - // Prevent problems with unallocated K when log starts. - K.resize(sizeState, sizeObservation); - K.setZero(); } KalmanFilter::~KalmanFilter() { } +void KalmanFilter::setupMPI(MPI_Comm* communicator) +{ + MPI_Comm_dup(*communicator, &comm); + MPI_Comm_rank(comm, &myRank); + MPI_Comm_size(comm, &numProcs); + + return; +} + +void KalmanFilter::setupDecoupling(vector> groupLimits) +{ + this->groupLimits = groupLimits; + + // Check consistency of decoupling group limits. + if (groupLimits.at(0).first != 0) + { + auto const& l = groupLimits.at(0); + throw runtime_error(strpr("ERROR: Inconsistent decoupling group " + "limits, first group must start with index " + "0 (is %zu).\n", + l.first)); + + } + for (size_t i = 0; i < groupLimits.size(); ++i) + { + auto const& l = groupLimits.at(i); + if (l.first > l.second) + { + throw runtime_error( + strpr("ERROR: Inconsistent decoupling group limits, " + "group %zu: start %zu > end %zu.\n", + i, l.first, l.second)); + } + } + for (size_t i = 0; i < groupLimits.size() - 1; ++i) + { + auto const& l = groupLimits.at(i); + auto const& r = groupLimits.at(i + 1); + if (l.second + 1 != r.first) + { + throw runtime_error( + strpr("ERROR: Inconsistent decoupling group limits, " + "group %zu end %zu + 1 != group %zu start %zu.\n", + i, l.second, i + 1, r.first)); + } + } + if (groupLimits.back().second != sizeState - 1) + { + auto const& l = groupLimits.back(); + throw runtime_error(strpr("ERROR: Inconsistent decoupling group " + "limits, last group must end with index " + "%zu (is %zu).\n", + sizeState - 1, + l.second)); + + } + + // Distribute groups to MPI procs. + numGroupsPerProc.resize(numProcs, 0); + groupOffsetPerProc.resize(numProcs, 0); + int quotient = groupLimits.size() / numProcs; + int remainder = groupLimits.size() % numProcs; + int groupSum = 0; + for (int i = 0; i < numProcs; ++i) + { + numGroupsPerProc.at(i) = quotient; + if (remainder > 0 && i < remainder) + { + numGroupsPerProc.at(i)++; + } + groupOffsetPerProc.at(i) = groupSum; + groupSum += numGroupsPerProc.at(i); + } + + // Compute state vector segment length and offset for each processor. + for (int i = 0; i < numProcs - 1; ++i) + { + sizeStatePerProc.push_back( + groupLimits.at(groupOffsetPerProc.at(i+1)).first - + groupLimits.at(groupOffsetPerProc.at(i)).first); + offsetStatePerProc.push_back( + groupLimits.at(groupOffsetPerProc.at(i)).first); + } + sizeStatePerProc.push_back( + groupLimits.back().second + 1 - + groupLimits.at(groupOffsetPerProc.at(numProcs - 1)).first); + offsetStatePerProc.push_back( + groupLimits.at(groupOffsetPerProc.at(numProcs - 1)).first); + + // Store group index and segment length for this processor. + for (size_t i = 0; i < numGroupsPerProc.at(myRank); ++i) + { + size_t index = groupOffsetPerProc.at(myRank) + i; + size_t size = groupLimits.at(index).second + - groupLimits.at(index).first + 1; + myGroups.push_back(make_pair(index, size)); + } + + // Allocate per-processor matrices for all groups. + for (auto g : myGroups) + { + P.push_back(Eigen::MatrixXd(g.second, g.second)); + P.back().setIdentity(); + + X.push_back(Eigen::MatrixXd(g.second, sizeObservation)); + + // Prevent problems with unallocated K when log starts. + K.push_back(Eigen::MatrixXd(g.second, sizeObservation)); + K.back().setZero(); + + sizeP += g.second * g.second; + } + + MPI_Allreduce(MPI_IN_PLACE, &sizeP, 1, MPI_SIZE_T, MPI_SUM, comm); + + return; +} + void KalmanFilter::setSizeObservation(size_t const size) { sizeObservation = size; @@ -125,15 +243,32 @@ void KalmanFilter::update() void KalmanFilter::update(size_t const sizeObservation) { - X.resize(sizeState, sizeObservation); + MatrixXd A(sizeObservation, sizeObservation); + A.setZero(); - // Calculate temporary result. - // X = P . H - X = P.selfadjointView() * (*H); + for (size_t i = 0; i < myGroups.size(); ++i) + { + size_t const& j = myGroups.at(i).first; // group index + size_t const& n = myGroups.at(i).second; // group size + size_t const& s = groupLimits.at(j).first; // group weight start index + + auto& x = X.at(i); + auto& p = P.at(i); + + x.resize(n, sizeObservation); + + // Calculate temporary result. + // X = P . H + x = p.selfadjointView() * H->block(s, 0, n, sizeObservation); + + // Calculate scaling matrix. + // A = H^T . X + // For decoupling summarize scaling matrix here (locally). + A += H->block(s, 0, n, sizeObservation).transpose() * x; + } - // Calculate scaling matrix. - // A = H^T . X - MatrixXd A = H->transpose() * X; + // Summarize scaling matrix globally. + MPI_Allreduce(MPI_IN_PLACE, A.data(), A.size(), MPI_DOUBLE, MPI_SUM, comm); // Increase learning rate. // eta(n) = eta(0) * exp(n * tau) @@ -150,38 +285,52 @@ void KalmanFilter::update(size_t const sizeObservation) A.diagonal() += VectorXd::Constant(sizeObservation, lambda); } - // Calculate Kalman gain matrix. - // K = X . A^-1 - K.resize(sizeState, sizeObservation); - K = X * A.inverse(); - - // Update error covariance matrix. - // P = P - K . X^T - P.noalias() -= K * X.transpose(); - - // Apply forgetting factor. - if (type == KT_FADINGMEMORY) + for (size_t i = 0; i < myGroups.size(); ++i) { - P *= 1.0 / lambda; + size_t const& j = myGroups.at(i).first; // group index + size_t const& n = myGroups.at(i).second; // group size + size_t const& s = groupLimits.at(j).first; // group weight start index + + auto& x = X.at(i); + auto& p = P.at(i); + auto& k = K.at(i); + + // Calculate Kalman gain matrix. + // K = X . A^-1 + k.resize(n, sizeObservation); + k = x * A.inverse(); + + // Update error covariance matrix. + // P = P - K . X^T + p.noalias() -= k * x.transpose(); + + // Apply forgetting factor. + if (type == KT_FADINGMEMORY) + { + p *= 1.0 / lambda; + } + // Add process noise. + // P = P + Q + p.diagonal() += VectorXd::Constant(n, q); + + // Update state vector. + // w = w + K . xi + w->segment(s, n) += k * (*xi); + + // Anneal process noise. + // q(n) = q(0) * exp(-n * tau) + if (q > qmin) q *= exp(-qtau); + + // Update forgetting factor. + if (type == KT_FADINGMEMORY) + { + lambda = nu * lambda + 1.0 - nu; + gamma = 1.0 / (1.0 + lambda / gamma); + } } - // Add process noise. - // P = P + Q - P.diagonal() += VectorXd::Constant(sizeState, q); - - // Update state vector. - // w = w + K . xi - (*w) += K * (*xi); - // Anneal process noise. - // q(n) = q(0) * exp(-n * tau) - if (q > qmin) q *= exp(-qtau); - - // Update forgetting factor. - if (type == KT_FADINGMEMORY) - { - lambda = nu * lambda + 1.0 - nu; - gamma = 1.0 / (1.0 + lambda / gamma); - } + // Communicate new weight segments. + MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DOUBLE, w->data(), sizeStatePerProc.data(), offsetStatePerProc.data(), MPI_DOUBLE, comm); numUpdates++; @@ -206,7 +355,10 @@ void KalmanFilter::setParametersStandard(double const epsilon, q = q0; eta = eta0; - P /= epsilon; + for (auto& p : P) + { + p /= epsilon; + } return; } @@ -226,7 +378,10 @@ void KalmanFilter::setParametersFadingMemory(double const epsilon, this->nu = nu ; q = q0; - P /= epsilon; + for (auto& p : P) + { + p /= epsilon; + } gamma = 1.0; return; @@ -235,12 +390,30 @@ void KalmanFilter::setParametersFadingMemory(double const epsilon, string KalmanFilter::status(size_t epoch) const { - double Pasym = 0.5 * (P - P.transpose()).array().abs().mean(); - double Pdiag = P.diagonal().array().abs().sum(); - double Poffdiag = (P.array().abs().sum() - Pdiag) - / (sizeState * (sizeState - 1)); + double Pasym = 0.0; + double Pdiag = 0.0; + double Poffdiag = 0.0; + double Kmean = 0.0; + + for (size_t i = 0; i < myGroups.size(); ++i) + { + auto const& p = P.at(i); + auto const& k = K.at(i); + Pasym += 0.5 * (p - p.transpose()).array().abs().sum(); + double pdiag = p.diagonal().array().abs().sum(); + Pdiag += pdiag; + Poffdiag += p.array().abs().sum() - pdiag; + Kmean += k.array().abs().sum(); + } + + MPI_Allreduce(MPI_IN_PLACE, &Pasym , 1, MPI_DOUBLE, MPI_SUM, comm); + MPI_Allreduce(MPI_IN_PLACE, &Pdiag , 1, MPI_DOUBLE, MPI_SUM, comm); + MPI_Allreduce(MPI_IN_PLACE, &Poffdiag, 1, MPI_DOUBLE, MPI_SUM, comm); + MPI_Allreduce(MPI_IN_PLACE, &Kmean , 1, MPI_DOUBLE, MPI_SUM, comm); + Pasym /= (sizeP - sizeState); Pdiag /= sizeState; - double Kmean = K.array().abs().mean(); + Poffdiag /= (sizeP - sizeState); + Kmean /= (sizeState * sizeObservation); string s = strpr("%10zu %10zu %16.8E %16.8E %16.8E %16.8E %16.8E", epoch, numUpdates, Pdiag, Poffdiag, Pasym, Kmean, q); @@ -314,11 +487,24 @@ vector KalmanFilter::info() const { vector v; + v.push_back("Extended Kalman Filter (EKF) optimizer:\n"); + v.push_back(strpr("sizeState = %zu\n", sizeState)); + v.push_back(strpr("sizeObservation = %zu\n", sizeObservation)); + v.push_back(strpr("myRank = %d\n", myRank)); + v.push_back(strpr("numProcs = %d\n", numProcs)); + v.push_back(strpr("OpenMP threads used : %d\n", + nbThreads())); + v.push_back(strpr("Number of decoupling groups : %zu\n", + groupLimits.size())); + v.push_back(strpr("Number of decoupling groups of proc %3d: %zu\n", + myRank, myGroups.size())); + v.push_back(strpr("P matrix size ratio (compared to GEKF) : %6.2f %%\n", + 100.0 * sizeP / (sizeState * sizeState))); + v.push_back("-----------------------------------------" + "--------------------------------------\n"); if (type == KT_STANDARD) { v.push_back(strpr("KalmanType::KT_STANDARD (%d)\n", type)); - v.push_back(strpr("sizeState = %zu\n", sizeState)); - v.push_back(strpr("sizeObservation = %zu\n", sizeObservation)); v.push_back(strpr("epsilon = %12.4E\n", epsilon)); v.push_back(strpr("q0 = %12.4E\n", q0 )); v.push_back(strpr("qtau = %12.4E\n", qtau )); @@ -330,8 +516,6 @@ vector KalmanFilter::info() const else if (type == KT_FADINGMEMORY) { v.push_back(strpr("KalmanType::KT_FADINGMEMORY (%d)\n", type)); - v.push_back(strpr("sizeState = %zu\n", sizeState)); - v.push_back(strpr("sizeObservation = %zu\n", sizeObservation)); v.push_back(strpr("epsilon = %12.4E\n", epsilon)); v.push_back(strpr("q0 = %12.4E\n", q0 )); v.push_back(strpr("qtau = %12.4E\n", qtau )); @@ -339,7 +523,17 @@ vector KalmanFilter::info() const v.push_back(strpr("lambda = %12.4E\n", lambda)); v.push_back(strpr("nu = %12.4E\n", nu )); } - v.push_back(strpr("OpenMP threads used: %d\n", nbThreads())); + //for (size_t i = 0; i < groupLimits.size(); ++i) + //{ + // v.push_back(strpr(" - group %5zu size: %zu\n", + // i, + // groupLimits.at(i).second + // - groupLimits.at(i).first + 1)); + //} + //for (auto g : myGroups) + //{ + // v.push_back(strpr(" - group %5zu size: %zu\n", g.first, g.second)); + //} return v; } diff --git a/src/libnnptrain/KalmanFilter.h b/src/libnnptrain/KalmanFilter.h index 356cee949..0d409ac1c 100644 --- a/src/libnnptrain/KalmanFilter.h +++ b/src/libnnptrain/KalmanFilter.h @@ -22,6 +22,7 @@ #include #include #include +#include #include namespace nnp @@ -50,6 +51,19 @@ class KalmanFilter : public Updater /** Destructor. */ virtual ~KalmanFilter(); + /** Basic MPI setup. + * + * @param[in] communicator Input communicator of calling program. + */ + void setupMPI(MPI_Comm* communicator); + /** Set up Kalman filter decoupling. + * + * @param[in] groupLimits Vector containing starting and ending indices of + * each decoupling group. + */ + void setupDecoupling( + std::vector> groupLimits); /** Set observation vector size. * * @param[in] size Size of the observation vector. @@ -194,10 +208,16 @@ class KalmanFilter : public Updater private: /// Kalman filter type. KalmanType type; + /// My process ID. + int myRank; + /// Total number of MPI processors. + int numProcs; /// Size of observation (measurement) vector. std::size_t sizeObservation; /// Total number of updates performed. std::size_t numUpdates; + /// Number of covariance matrix entries in memory (sum over all groups). + std::size_t sizeP; /// Error covariance initialization parameter @f$\epsilon@f$. double epsilon; /// Process noise @f$q@f$. @@ -222,18 +242,34 @@ class KalmanFilter : public Updater double nu; /// Forgetting gain factor gamma for fading memory Kalman filter. double gamma; + /// Size of state vector segment handled by each processor. + std::vector sizeStatePerProc; + /// Offset of state vector segment handled by each processor. + std::vector offsetStatePerProc; + /// List of (group indices, group sizes) of this processor. + std::vector> myGroups; + /// Number of groups for each processor. + std::vector numGroupsPerProc; + /// Offset in terms of groups for each processor. + std::vector groupOffsetPerProc; + /// Decoupling group limits, i.e. starting end ending indices. + std::vector> groupLimits; /// State vector. Eigen::Map* w; /// Error vector. Eigen::Map* xi; /// Derivative matrix. Eigen::Map* H; - /// Error covariance matrix. - Eigen::MatrixXd P; + /// Error covariance matrices for all groups. + std::vector P; /// Kalman gain matrix. - Eigen::MatrixXd K; + std::vector K; /// Intermediate result X = P . H. - Eigen::MatrixXd X; + std::vector X; + /// Global MPI communicator. + MPI_Comm comm; }; ////////////////////////////////// diff --git a/src/libnnptrain/Training.cpp b/src/libnnptrain/Training.cpp index ba293c7d0..3b334d72d 100644 --- a/src/libnnptrain/Training.cpp +++ b/src/libnnptrain/Training.cpp @@ -40,6 +40,7 @@ Training::Training() : Dataset(), jacobianMode (JM_SUM ), updateStrategy (US_COMBINED ), selectionMode (SM_RANDOM ), + decouplingType (DT_GLOBAL ), hasUpdaters (false ), hasStructures (false ), useForces (false ), @@ -300,7 +301,13 @@ void Training::initializeWeights() w.at(i).at(j) = minWeights + gsl_rng_uniform(rngGlobal) * (maxWeights - minWeights); } - elements.at(i).neuralNetwork->setConnections(&(w.at(i).front())); +#ifndef ALTERNATIVE_WEIGHT_ORDERING + // To be consistent this should actually be the non-AO version, + // but this way results stay comparable. + elements.at(i).neuralNetwork->setConnectionsAO(&(w.at(i).front())); +#else + elements.at(i).neuralNetwork->setConnectionsAO(&(w.at(i).front())); +#endif } if (settings.keywordExists("nguyen_widrow_weights_short")) { @@ -330,7 +337,7 @@ void Training::initializeWeights() it != elements.end(); ++it) { it->neuralNetwork-> - modifyConnections(NeuralNetwork::MS_GLOROTBENGIO); + modifyConnections(NeuralNetwork::MS_GLOROTBENGIO_UNIFORM); //it->neuralNetwork-> // modifyConnections(NeuralNetwork::MS_ZEROOUTPUTWEIGHTS); it->neuralNetwork-> @@ -991,6 +998,44 @@ void Training::setupTraining() { kalmanType = (KalmanFilter::KalmanType) atoi(settings["kalman_type"].c_str()); + decouplingType = (DecouplingType) + atoi(settings["decoupling_type"].c_str()); + if (decouplingType == DT_GLOBAL) + { + log << strpr("No decoupling (GEKF) selected: DT_GLOBAL (%d).\n", + decouplingType); + } + else if (decouplingType == DT_ELEMENT) + { + log << strpr("Per-element decoupling (ED-GEKF) selected: " + "DT_ELEMENT (%d).\n", decouplingType); + } + else if (decouplingType == DT_LAYER) + { + log << strpr("Per-layer decoupling selected: " + "DT_LAYER (%d).\n", decouplingType); + } + else if (decouplingType == DT_NODE) + { + log << strpr("Per-node decoupling (NDEKF) selected: " + "DT_NODE (%d).\n", decouplingType); + } + else if (decouplingType == DT_FULL) + { + log << strpr("Full (per-weight) decoupling selected: " + "DT_FULL (%d).\n", decouplingType); + } + else + { + throw runtime_error("ERROR: Unknown Kalman filter decoupling " + "type.\n"); + } + if (decouplingType != DT_GLOBAL && updateStrategy != US_COMBINED) + { + throw runtime_error(strpr("ERROR: Kalman filter decoupling works " + "only in conjunction with " + "update_strategy %d\".\n", US_COMBINED)); + } } for (size_t i = 0; i < numUpdaters; ++i) @@ -1008,6 +1053,81 @@ void Training::setupTraining() updaters.push_back( (Updater*)new KalmanFilter(numWeightsPerUpdater.at(i), kalmanType)); + KalmanFilter* u = dynamic_cast(updaters.back()); + if (parallelMode == PM_TRAIN_ALL) + { + u->setupMPI(&comm); + } + else + { + MPI_Group groupWorld; + MPI_Comm_group(comm, &groupWorld); + int const size0 = 1; + int const rank0[1] = {0}; + MPI_Group groupRank0; + MPI_Group_incl(groupWorld, size0, rank0, &groupRank0); + MPI_Comm commRank0; + MPI_Comm_create_group(comm, groupRank0, 0, &commRank0); + u->setupMPI(&commRank0); + } + vector> limits; + if (decouplingType == DT_GLOBAL) + { + limits.push_back( + make_pair(0, numWeightsPerUpdater.at(i) - 1)); + } + else if (decouplingType == DT_ELEMENT) + { + for (size_t j = 0; j < numElements; ++j) + { + limits.push_back(make_pair( + weightsOffset.at(j), + weightsOffset.at(j) + elements.at(j). + neuralNetwork->getNumConnections() - 1)); + } + } + else if (decouplingType == DT_LAYER) + { + for (size_t j = 0; j < numElements; ++j) + { + for (auto l : elements.at(j).neuralNetwork + ->getLayerLimits()) + { + limits.push_back(make_pair( + weightsOffset.at(j) + l.first, + weightsOffset.at(j) + l.second)); + } + } + } + else if (decouplingType == DT_NODE) + { +#ifndef ALTERNATIVE_WEIGHT_ORDERING + for (size_t j = 0; j < numElements; ++j) + { + for (auto l : elements.at(j).neuralNetwork + ->getNeuronLimits()) + { + limits.push_back(make_pair( + weightsOffset.at(j) + l.first, + weightsOffset.at(j) + l.second)); + } + } +#else + throw runtime_error("ERROR: Node-decoupled Kalman filter " + "(NDEFK) training not possible with " + "alternative (old) weight memory " + "layout, recompile without " + "-DALTERNATIVE_WEIGHT_ORDERING.\n"); +#endif + } + else if (decouplingType == DT_FULL) + { + for (size_t j = 0; j < numWeightsPerUpdater.at(i); ++j) + { + limits.push_back(make_pair(j, j)); + } + } + u->setupDecoupling(limits); } updaters.back()->setState(&(weights.at(i).front())); } @@ -1432,7 +1552,7 @@ void Training::calculateErrorEpoch() fileNameForcesTest = strpr("testforces.%06zu.out", epoch); } - // Calculate RMSE and write comparison files. + // Calculate error and write comparison files. calculateError(true, identifier, fileNameEnergiesTrain, @@ -1452,7 +1572,9 @@ void Training::writeWeights(string const fileNameFormat) const string fileName = strpr(fileNameFormat.c_str(), elements.at(i).getAtomicNumber()); file.open(fileName.c_str()); - elements.at(i).neuralNetwork->writeConnections(file); + // Attention: need alternative (old) ordering scheme here for + // backward compatibility! + elements.at(i).neuralNetwork->writeConnectionsAO(file); file.close(); } @@ -1629,7 +1751,7 @@ void Training::writeNeuronStatistics(string const fileName) const vector colInfo; vector colSize; title.push_back("Statistics for individual neurons gathered during " - "RMSE calculation."); + "error calculation."); colSize.push_back(10); colName.push_back("element"); colInfo.push_back("Element index."); @@ -1743,24 +1865,31 @@ void Training::writeUpdaterStatus(bool append, for (size_t i = 0; i < numUpdaters; ++i) { - string fileName; - if (updateStrategy == US_COMBINED) - { - fileName = strpr(fileNameFormat.c_str(), 0); - } - else if (updateStrategy == US_ELEMENT) + if (myRank == 0) { - fileName = strpr(fileNameFormat.c_str(), - elementMap.atomicNumber(i)); + string fileName; + if (updateStrategy == US_COMBINED) + { + fileName = strpr(fileNameFormat.c_str(), 0); + } + else if (updateStrategy == US_ELEMENT) + { + fileName = strpr(fileNameFormat.c_str(), + elementMap.atomicNumber(i)); + } + if (append) file.open(fileName.c_str(), ofstream::app); + else + { + file.open(fileName.c_str()); + appendLinesToFile(file, updaters.at(i)->statusHeader()); + } + file << updaters.at(i)->status(epoch); + file.close(); } - if (append) file.open(fileName.c_str(), ofstream::app); - else + else if (parallelMode == PM_TRAIN_ALL) { - file.open(fileName.c_str()); - appendLinesToFile(file, updaters.at(i)->statusHeader()); + updaters.at(i)->status(epoch); } - file << updaters.at(i)->status(epoch); - file.close(); } return; @@ -1884,7 +2013,7 @@ void Training::loop() else if (errorMetricForces == 1) metric = "MAE"; if (errorMetricEnergies % 2 == 0) peratom = "per atom "; - log << "The training loop output covers different RMSEs, update and\n"; + log << "The training loop output covers different errors, update and\n"; log << "timing information. The following quantities are organized\n"; log << "according to the matrix scheme below:\n"; log << "-------------------------------------------------------------------\n"; @@ -1911,7 +2040,7 @@ void Training::loop() log << "Abbreviations:\n"; log << " p. u. = physical units.\n"; log << " i. u. = internal units.\n"; - log << "Note: RMSEs in internal units (columns 5 + 6) are only present \n"; + log << "Note: Errors in internal units (columns 5 + 6) are only present \n"; log << " if data set normalization is used.\n"; log << "-------------------------------------------------------------------\n"; log << " 1 2 3 4 5 6\n"; @@ -1940,8 +2069,9 @@ void Training::loop() // Write learning curve. if (myRank == 0) writeLearningCurve(false); - // Write updater status to file. - if (myRank == 0) writeUpdaterStatus(false); + // Write updater status to file (decoupled KF requires all tasks to execute + // the status() function. + writeUpdaterStatus(false); // Write neuron statistics. writeNeuronStatisticsEpoch(); @@ -2013,7 +2143,7 @@ void Training::loop() if (myRank == 0) writeLearningCurve(true); // Write updater status to file. - if (myRank == 0) writeUpdaterStatus(true); + writeUpdaterStatus(true); // Write neuron statistics. writeNeuronStatisticsEpoch(); @@ -2280,6 +2410,22 @@ void Training::update(bool force) else { nn->calculateDEdc(&(dXdc.at(i).front())); + //vector temp(dXdc.at(i).size(), 0.0); + //vector tempAO(dXdc.at(i).size(), 0.0); + //nn->calculateDEdc(&(temp.front())); + //ofstream tf("dedc.out"); + //size_t count; + //count = 0; + //sort(temp.begin(), temp.end()); + //for (auto i : temp) tf << strpr("%zu %16.8E\n", count++, i); + //tf.close(); + //nn->calculateDEdcAO(&(tempAO.front())); + //tf.open("dedc.out.ao"); + //count = 0; + //sort(tempAO.begin(), tempAO.end()); + //for (auto i : tempAO) tf << strpr("%zu %16.8E\n", count++, i); + //tf.close(); + //throw runtime_error("END HERE"); } // Finally sum up Jacobian. if (updateStrategy == US_ELEMENT) iu = i; @@ -2715,7 +2861,11 @@ void Training::getWeights() for (size_t i = 0; i < numElements; ++i) { NeuralNetwork const* const& nn = elements.at(i).neuralNetwork; +#ifndef ALTERNATIVE_WEIGHT_ORDERING nn->getConnections(&(weights.at(0).at(pos))); +#else + nn->getConnectionsAO(&(weights.at(0).at(pos))); +#endif pos += nn->getNumConnections(); } } @@ -2724,7 +2874,11 @@ void Training::getWeights() for (size_t i = 0; i < numElements; ++i) { NeuralNetwork const* const& nn = elements.at(i).neuralNetwork; +#ifndef ALTERNATIVE_WEIGHT_ORDERING nn->getConnections(&(weights.at(i).front())); +#else + nn->getConnectionsAO(&(weights.at(i).front())); +#endif } } @@ -2739,7 +2893,11 @@ void Training::setWeights() for (size_t i = 0; i < numElements; ++i) { NeuralNetwork* const& nn = elements.at(i).neuralNetwork; +#ifndef ALTERNATIVE_WEIGHT_ORDERING nn->setConnections(&(weights.at(0).at(pos))); +#else + nn->setConnectionsAO(&(weights.at(0).at(pos))); +#endif pos += nn->getNumConnections(); } } @@ -2748,7 +2906,11 @@ void Training::setWeights() for (size_t i = 0; i < numElements; ++i) { NeuralNetwork* const& nn = elements.at(i).neuralNetwork; +#ifndef ALTERNATIVE_WEIGHT_ORDERING nn->setConnections(&(weights.at(i).front())); +#else + nn->setConnectionsAO(&(weights.at(i).front())); +#endif } } diff --git a/src/libnnptrain/Training.h b/src/libnnptrain/Training.h index 1ae678255..93022ebc8 100644 --- a/src/libnnptrain/Training.h +++ b/src/libnnptrain/Training.h @@ -110,6 +110,21 @@ class Training : public Dataset SM_THRESHOLD }; + /// Kalman filter decoupling type (requires UT_KF and US_COMBINED). + enum DecouplingType + { + /// No decoupling, global extended Kalman filter (GEKF). + DT_GLOBAL, + /// Per-element decoupling, ED-GEKF (doi.org/10.1021/acs.jctc.5b00211). + DT_ELEMENT, + /// Layer decoupling. + DT_LAYER, + /// Node decoupling, NDEKF. + DT_NODE, + /// Full decoupling, FDEKF. + DT_FULL + }; + /// Constructor. Training(); /// Destructor, updater vector needs to be cleaned. @@ -309,6 +324,8 @@ class Training : public Dataset UpdateStrategy updateStrategy; /// Selection mode for update candidates. SelectionMode selectionMode; + /// Kalman filter decoupling type if KF training selected. + DecouplingType decouplingType; /// If this rank performs weight updates. bool hasUpdaters; /// If this rank holds structure information. diff --git a/src/makefile.gnu b/src/makefile.gnu index 223a03485..ece0147ce 100644 --- a/src/makefile.gnu +++ b/src/makefile.gnu @@ -56,3 +56,6 @@ PROJECT_OPTIONS+= -DEIGEN_DONT_PARALLELIZE # Use improved memory layout for symmetry function derivatives. PROJECT_OPTIONS+= -DIMPROVED_SFD_MEMORY + +# Use alternative (old) ordering scheme of weights (NDEKF not possible). +#PROJECT_OPTIONS+= -DALTERNATIVE_WEIGHT_ORDERING diff --git a/src/makefile.intel b/src/makefile.intel index 17e61b7a0..4a9a0e94c 100644 --- a/src/makefile.intel +++ b/src/makefile.intel @@ -57,3 +57,6 @@ PROJECT_OPTIONS+= -DEIGEN_DONT_PARALLELIZE # Use improved memory layout for symmetry function derivatives. PROJECT_OPTIONS+= -DIMPROVED_SFD_MEMORY + +# Use alternative (old) ordering scheme of weights (NDEKF not possible). +#PROJECT_OPTIONS+= -DALTERNATIVE_WEIGHT_ORDERING diff --git a/src/makefile.llvm b/src/makefile.llvm index b748ce08b..30deb9a03 100644 --- a/src/makefile.llvm +++ b/src/makefile.llvm @@ -58,3 +58,6 @@ PROJECT_OPTIONS+= -DEIGEN_DONT_PARALLELIZE # Use improved memory layout for symmetry function derivatives. PROJECT_OPTIONS+= -DIMPROVED_SFD_MEMORY + +# Use alternative (old) ordering scheme of weights (NDEKF not possible). +#PROJECT_OPTIONS+= -DALTERNATIVE_WEIGHT_ORDERING diff --git a/test/cpp/ExampleNeuralNetworx.h b/test/cpp/ExampleNeuralNetworx.h new file mode 100644 index 000000000..b88cda8ee --- /dev/null +++ b/test/cpp/ExampleNeuralNetworx.h @@ -0,0 +1,215 @@ +#ifndef EXAMPLENEURALNETWORX_H +#define EXAMPLENEURALNETWORX_H + +#include +#include +#include "Example.h" +#include "BoostDataContainer.h" +#include "NeuralNetworx.h" + +struct ExampleNeuralNetworx : public Example +{ + std::size_t numLayers; + std::size_t numConnections; + std::size_t numWeights; + std::size_t numBiases; + std::vector numNeuronsPerLayer; + std::vector> limitsLayers; + std::vector> limitsNeurons; + std::vector activationPerLayer; + std::vector activationStringPerLayer; + + ExampleNeuralNetworx(); + + ExampleNeuralNetworx(std::string name) : numLayers (0), + numConnections(0), + numWeights (0), + numBiases (0) + { + this->name = name; + this->description = std::string("NeuralNetworx example \"") + + this->name + + "\""; + } +}; + +template<> +void BoostDataContainer::setup() +{ + ExampleNeuralNetworx* e = nullptr; + + examples.push_back(ExampleNeuralNetworx("3-layer, single output")); + e = &(examples.back()); + e->numNeuronsPerLayer = {2, 3, 1}; + e->activationStringPerLayer = {"l", "t", "p"}; + e->activationPerLayer = {nnp::NeuralNetworx::Activation::IDENTITY, + nnp::NeuralNetworx::Activation::TANH, + nnp::NeuralNetworx::Activation::SOFTPLUS}; + e->numLayers = 3; + e->numConnections = 13; + e->numWeights = 9; + e->numBiases = 4; + e->limitsLayers.push_back(std::make_pair(0, 8)); + e->limitsLayers.push_back(std::make_pair(9, 12)); + e->limitsNeurons.push_back(std::make_pair(0, 2)); + e->limitsNeurons.push_back(std::make_pair(3, 5)); + e->limitsNeurons.push_back(std::make_pair(6, 8)); + e->limitsNeurons.push_back(std::make_pair(9, 12)); + + examples.push_back(ExampleNeuralNetworx("4-layer, single output")); + e = &(examples.back()); + e->numNeuronsPerLayer = {10, 20, 15, 1}; + e->activationStringPerLayer = {"l", "t", "p", "l"}; + e->activationPerLayer = {nnp::NeuralNetworx::Activation::IDENTITY, + nnp::NeuralNetworx::Activation::TANH, + nnp::NeuralNetworx::Activation::SOFTPLUS, + nnp::NeuralNetworx::Activation::IDENTITY}; + e->numLayers = 4; + e->numConnections = 551; + e->numWeights = 515; + e->numBiases = 36; + e->limitsLayers.push_back(std::make_pair(0, 219)); + e->limitsLayers.push_back(std::make_pair(220, 534)); + e->limitsLayers.push_back(std::make_pair(535, 550)); + e->limitsNeurons.push_back(std::make_pair(0, 10)); // layer 2 + e->limitsNeurons.push_back(std::make_pair(11, 21)); + e->limitsNeurons.push_back(std::make_pair(22, 32)); + e->limitsNeurons.push_back(std::make_pair(33, 43)); + e->limitsNeurons.push_back(std::make_pair(44, 54)); + e->limitsNeurons.push_back(std::make_pair(55, 65)); + e->limitsNeurons.push_back(std::make_pair(66, 76)); + e->limitsNeurons.push_back(std::make_pair(77, 87)); + e->limitsNeurons.push_back(std::make_pair(88, 98)); + e->limitsNeurons.push_back(std::make_pair(99, 109)); + e->limitsNeurons.push_back(std::make_pair(110, 120)); + e->limitsNeurons.push_back(std::make_pair(121, 131)); + e->limitsNeurons.push_back(std::make_pair(132, 142)); + e->limitsNeurons.push_back(std::make_pair(143, 153)); + e->limitsNeurons.push_back(std::make_pair(154, 164)); + e->limitsNeurons.push_back(std::make_pair(165, 175)); + e->limitsNeurons.push_back(std::make_pair(176, 186)); + e->limitsNeurons.push_back(std::make_pair(187, 197)); + e->limitsNeurons.push_back(std::make_pair(198, 208)); + e->limitsNeurons.push_back(std::make_pair(209, 219)); + e->limitsNeurons.push_back(std::make_pair(220, 240)); // layer 3 + e->limitsNeurons.push_back(std::make_pair(241, 261)); + e->limitsNeurons.push_back(std::make_pair(262, 282)); + e->limitsNeurons.push_back(std::make_pair(283, 303)); + e->limitsNeurons.push_back(std::make_pair(304, 324)); + e->limitsNeurons.push_back(std::make_pair(325, 345)); + e->limitsNeurons.push_back(std::make_pair(346, 366)); + e->limitsNeurons.push_back(std::make_pair(367, 387)); + e->limitsNeurons.push_back(std::make_pair(388, 408)); + e->limitsNeurons.push_back(std::make_pair(409, 429)); + e->limitsNeurons.push_back(std::make_pair(430, 450)); + e->limitsNeurons.push_back(std::make_pair(451, 471)); + e->limitsNeurons.push_back(std::make_pair(472, 492)); + e->limitsNeurons.push_back(std::make_pair(493, 513)); + e->limitsNeurons.push_back(std::make_pair(514, 534)); + e->limitsNeurons.push_back(std::make_pair(535, 551)); // layer 4 + + examples.push_back(ExampleNeuralNetworx("10-layer, multiple output")); + e = &(examples.back()); + e->numNeuronsPerLayer = {25, 20, 15, 30, 10, 4, 12, 13, 7, 5}; + e->activationStringPerLayer = {"l", "c", "e", "g", "h", + "s", "r", "S", "p", "t"}; + e->activationPerLayer = {nnp::NeuralNetworx::Activation::IDENTITY, + nnp::NeuralNetworx::Activation::COS, + nnp::NeuralNetworx::Activation::EXP, + nnp::NeuralNetworx::Activation::GAUSSIAN, + nnp::NeuralNetworx::Activation::HARMONIC, + nnp::NeuralNetworx::Activation::LOGISTIC, + nnp::NeuralNetworx::Activation::RELU, + nnp::NeuralNetworx::Activation::REVLOGISTIC, + nnp::NeuralNetworx::Activation::SOFTPLUS, + nnp::NeuralNetworx::Activation::TANH}; + e->numLayers = 10; + e->numConnections = 2036; + e->numWeights = 1920; + e->numBiases = 116; + + return; +} + +struct ExampleActivation : public Example +{ + std::string activation; + std::vector x; + std::vector f; + + ExampleActivation(); + + ExampleActivation(std::string name) + { + this->name = name; + this->description = std::string("Activation example \"") + + this->name + + "\""; + } +}; + +template<> +void BoostDataContainer::setup() +{ + ExampleActivation* e = nullptr; + + examples.push_back(ExampleActivation("Linear \"l\"")); + e = &(examples.back()); + e->activation = "l"; + e->x = {-1.5, -0.7, 0.0, 0.7, 1.5}; + e->f = {-1.5, -0.7, 0.0, 0.7, 1.5}; + + examples.push_back(ExampleActivation("Hyperbolic Tangent \"t\"")); + e = &(examples.back()); + e->activation = "t"; + e->x = {-1.5, -0.7, 0.0, 0.7, 1.5}; + e->f = {-9.0514825364486640E-01, -6.0436777711716361E-01, 0.0, 6.0436777711716361E-01, 9.0514825364486640E-01}; + + examples.push_back(ExampleActivation("Logistic \"s\"")); + e = &(examples.back()); + e->activation = "s"; + e->x = {-1.5, -0.7, 0.0, 0.7, 1.5}; + e->f = {1.8242552380635635E-01, 3.3181222783183389E-01, 5.0E-01, 6.6818777216816616E-01, 8.1757447619364365E-01}; + + examples.push_back(ExampleActivation("Softplus \"p\"")); + e = &(examples.back()); + e->activation = "p"; + e->x = {-1.5, -0.7, 0.0, 0.7, 1.5}; + e->f = {2.0141327798275246E-01, 4.0318604888545784E-01, 6.9314718055994529E-01, 1.1031860488854579E+00, 1.7014132779827524E+00}; + + examples.push_back(ExampleActivation("ReLU \"r\"")); + e = &(examples.back()); + e->activation = "r"; + e->x = {-1.5, -0.7, -0.1, 0.1, 0.7, 1.5}; + e->f = {0.0, 0.0, 0.0, 0.1, 0.7, 1.5}; + + examples.push_back(ExampleActivation("Gaussian \"g\"")); + e = &(examples.back()); + e->activation = "g"; + e->x = {-1.5, -0.7, 0.0, 0.7, 1.5}; + e->f = {3.2465246735834974E-01, 7.8270453824186814E-01, 1.0E+00, 7.8270453824186814E-01, 3.2465246735834974E-01}; + + examples.push_back(ExampleActivation("Reverse Logistic \"S\"")); + e = &(examples.back()); + e->activation = "S"; + e->x = {-1.5, -0.7, 0.0, 0.7, 1.5}; + e->f = {8.1757447619364365E-01, 6.6818777216816616E-01, 5.0E-01, 3.3181222783183384E-01, 1.8242552380635635E-01}; + + examples.push_back(ExampleActivation("Exponential \"e\"")); + e = &(examples.back()); + e->activation = "e"; + e->x = {-1.5, -0.7, 0.0, 0.7, 1.5}; + e->f = {4.4816890703380645E+00, 2.0137527074704766E+00, 1.0E+00, 4.9658530379140953E-01, 2.2313016014842982E-01}; + + examples.push_back(ExampleActivation("Harmonic \"h\"")); + e = &(examples.back()); + e->activation = "h"; + e->x = {-1.5, -0.7, 0.0, 0.7, 1.5}; + e->f = {2.25E+00, 4.8999999999999994E-01, 0.0, 4.8999999999999994E-01, 2.25E+00}; + + return; +} + +#endif diff --git a/test/cpp/Example_nnp_train.h b/test/cpp/Example_nnp_train.h index fe4aa3d22..88ed24a76 100644 --- a/test/cpp/Example_nnp_train.h +++ b/test/cpp/Example_nnp_train.h @@ -28,13 +28,13 @@ void BoostDataContainer::setup() e->rmseForcesTrain = 3.77302629E-02; e->rmseForcesTest = 2.25135617E-01; - //examples.push_back(Example_nnp_train("H2O_RPBE-D3")); - //e = &(examples.back()); - //e->lastEpoch = 10; - //e->rmseEnergyTrain = 1.02312914E-04; - //e->rmseEnergyTest = 5.28837597E-04; - //e->rmseForcesTrain = 1.67069652E-02; - //e->rmseForcesTest = 1.07708383E-02; + examples.push_back(Example_nnp_train("H2O_RPBE-D3")); + e = &(examples.back()); + e->lastEpoch = 5; + e->rmseEnergyTrain = 9.19395999E-04; + e->rmseEnergyTest = 7.42520683E-04; + e->rmseForcesTrain = 2.40682415E-02; + e->rmseForcesTest = 1.44776060E-02; return; } diff --git a/test/cpp/makefile b/test/cpp/makefile index 9fcc5f411..3ac311611 100644 --- a/test/cpp/makefile +++ b/test/cpp/makefile @@ -34,10 +34,10 @@ OPTIONS+=$(PROJECT_OPTIONS) DEBUG=$(PROJECT_DEBUG) $(PROJECT_TEST) # Extra include paths for compiling. -INCLUDES=-I./ -I$(PROJECT_DIR)/src/libnnp -I$(PROJECT_DIR)/src/libnnptrain +INCLUDES=-I./ -I$(PROJECT_DIR)/src/libnnp -I$(PROJECT_DIR)/src/libnnptrain -I$(PROJECT_EIGEN) # Extra flags for linking. -LDFLAGS=$(PROJECT_LIB)/libnnp.a $(PROJECT_LIB)/libnnptrain.a -lboost_system -lboost_unit_test_framework -lboost_filesystem +LDFLAGS=$(PROJECT_LIB)/libnnp.a $(PROJECT_LIB)/libnnptrain.a -lboost_system -lboost_unit_test_framework -lboost_filesystem $(PROJECT_LDFLAGS_BLAS) ############################################################################### diff --git a/test/cpp/test_NeuralNetworx.cpp b/test/cpp/test_NeuralNetworx.cpp new file mode 100644 index 000000000..1b9b50df6 --- /dev/null +++ b/test/cpp/test_NeuralNetworx.cpp @@ -0,0 +1,306 @@ +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MODULE NeuralNetworx +#include +#include +#include +#include +#include "fileHelpers.h" +#include "ExampleNeuralNetworx.h" + +#include "NeuralNetworx.h" +#include "utility.h" +#include // std::fill, std::generate +#include // std::size_t +#include +#include // std::iota +#include // std::mt19937_64 +#include // std::string +#include // std::vector + +using namespace std; +using namespace nnp; + +namespace bdata = boost::unit_test::data; +namespace bfs = boost::filesystem; + +double const accuracy = 10.0 * numeric_limits::epsilon(); +double const delta = 1.0E-5; + +BoostDataContainer containerNN; +BoostDataContainer containerActivation; + +BOOST_AUTO_TEST_SUITE(UnitTests) + +BOOST_DATA_TEST_CASE(Initialize_CorrectNetworkArchitecture, + bdata::make(containerNN.examples), + example) +{ + NeuralNetworx nn(example.numNeuronsPerLayer, example.activationPerLayer); + + BOOST_REQUIRE_EQUAL(nn.getNumLayers(), example.numLayers); + BOOST_REQUIRE_EQUAL(nn.getNumConnections(), example.numConnections); + BOOST_REQUIRE_EQUAL(nn.getNumWeights(), example.numWeights); + BOOST_REQUIRE_EQUAL(nn.getNumBiases(), example.numBiases); +} + +BOOST_DATA_TEST_CASE(InitializeWithStrings_CorrectNetworkArchitecture, + bdata::make(containerNN.examples), + example) +{ + NeuralNetworx nn(example.numNeuronsPerLayer, + example.activationStringPerLayer); + + BOOST_REQUIRE_EQUAL(nn.getNumLayers(), example.numLayers); + BOOST_REQUIRE_EQUAL(nn.getNumConnections(), example.numConnections); + BOOST_REQUIRE_EQUAL(nn.getNumWeights(), example.numWeights); + BOOST_REQUIRE_EQUAL(nn.getNumBiases(), example.numBiases); +} + +BOOST_DATA_TEST_CASE(SetAndGetConnections_OriginalValuesRestored, + bdata::make(containerNN.examples), + example) +{ + NeuralNetworx nn(example.numNeuronsPerLayer, example.activationPerLayer); + + vector ci(nn.getNumConnections(), 0); + vector co(nn.getNumConnections(), 0); + iota(ci.begin(), ci.end(), 0); + nn.setConnections(ci); + nn.getConnections(co); + BOOST_CHECK_EQUAL_COLLECTIONS(ci.begin(), ci.end(), co.begin(), co.end()); +} + +BOOST_DATA_TEST_CASE(SetAndGetConnectionsAO_OriginalValuesRestored, + bdata::make(containerNN.examples), + example) +{ + NeuralNetworx nn(example.numNeuronsPerLayer, example.activationPerLayer); + + vector ci(nn.getNumConnections(), 0); + vector co(nn.getNumConnections(), 0); + iota(ci.begin(), ci.end(), 0); + nn.setConnectionsAO(ci); + nn.getConnectionsAO(co); + BOOST_CHECK_EQUAL_COLLECTIONS(ci.begin(), ci.end(), co.begin(), co.end()); +} + +BOOST_DATA_TEST_CASE(SetAndWriteConnections_OriginalValuesRestored, + bdata::make(containerNN.examples), + example) +{ + NeuralNetworx nn(example.numNeuronsPerLayer, example.activationPerLayer); + + vector ci(nn.getNumConnections(), 0); + vector co; + iota(ci.begin(), ci.end(), 0); + nn.setConnections(ci); + ofstream ofile; + ofile.open("weights.data"); + nn.writeConnections(ofile); + ofile.close(); + ifstream ifile; + ifile.open("weights.data"); + BOOST_REQUIRE(ifile.is_open()); + string line; + while (getline(ifile, line)) + { + if (line.at(0) != '#') + { + vector splitLine = split(reduce(line)); + co.push_back(atof(splitLine.at(0).c_str())); + } + } + + BOOST_CHECK_EQUAL_COLLECTIONS(ci.begin(), ci.end(), co.begin(), co.end()); + bfs::remove_all("weights.data"); +} + +BOOST_DATA_TEST_CASE(SetAndWriteConnectionsAO_OriginalValuesRestored, + bdata::make(containerNN.examples), + example) +{ + NeuralNetworx nn(example.numNeuronsPerLayer, example.activationPerLayer); + + vector ci(nn.getNumConnections(), 0); + vector co; + iota(ci.begin(), ci.end(), 0); + nn.setConnectionsAO(ci); + ofstream ofile; + ofile.open("weights.data"); + nn.writeConnectionsAO(ofile); + ofile.close(); + ifstream ifile; + ifile.open("weights.data"); + BOOST_REQUIRE(ifile.is_open()); + string line; + while (getline(ifile, line)) + { + if (line.at(0) != '#') + { + vector splitLine = split(reduce(line)); + co.push_back(atof(splitLine.at(0).c_str())); + } + } + + BOOST_CHECK_EQUAL_COLLECTIONS(ci.begin(), ci.end(), co.begin(), co.end()); + bfs::remove_all("weights.data"); +} + +BOOST_DATA_TEST_CASE(SingleNeuronActivation_AnalyticNumericDerivativesMatch, + bdata::make(containerActivation.examples), + example) +{ + // Set up NN with only two neurons. + NeuralNetworx nn(vector{1, 1}, + vector{"l", example.activation}); + nn.setConnections(vector{1.0, 0.0}); + + for (size_t i = 0; i < example.x.size(); ++i) + { + nn.propagate(vector{example.x.at(i)}); + vector output; + nn.getOutput(output); + cout << output.at(0) << "\n"; + BOOST_TEST_INFO(example.name + " x(" << i + << ") = " << example.x.at(i)); + BOOST_REQUIRE_SMALL(output.at(0) - example.f.at(i), accuracy); + + nn.propagate(vector{example.x.at(i) - delta}); + double const yLeft = nn.getNeuronProperties(1, 0).at("y"); + + nn.propagate(vector{example.x.at(i) + delta}); + double const yRight = nn.getNeuronProperties(1, 0).at("y"); + + nn.propagate(vector{example.x.at(i)}); + auto yCenterProperties = nn.getNeuronProperties(1, 0); + double const y = yCenterProperties.at("y"); + BOOST_REQUIRE_SMALL(y - example.f.at(i), accuracy); + + double const dydx = yCenterProperties.at("dydx"); + double const dydxNum = (yRight - yLeft) / (2.0 * delta); + BOOST_REQUIRE_SMALL(dydx - dydxNum, 10 * delta); + + double const d2ydx2 = yCenterProperties.at("d2ydx2"); + double const d2ydx2Num = (yRight - 2.0 * y + yLeft) / (delta * delta); + + //cout << strpr("yl : %23.16E\n", yLeft); + //cout << strpr("y : %23.16E\n", y); + //cout << strpr("f : %23.16E\n", example.f.at(i)); + //cout << strpr("yr : %23.16E\n", yRight); + //cout << strpr("yr-2y+yl : %23.16E\n", yRight - 2.0 * y + yLeft); + //cout << strpr("delta**2 : %23.16E\n", delta * delta); + //cout << "\n"; + //cout << strpr("dydx : %23.16E\n", dydx); + //cout << strpr("dydxNum : %23.16E\n", dydxNum); + //cout << "\n"; + //cout << strpr("d2ydx2 : %23.16E\n", d2ydx2); + //cout << strpr("d2ydx2Num : %23.16E\n", d2ydx2Num); + + BOOST_REQUIRE_SMALL(d2ydx2 - d2ydx2Num, 10 * delta); + } +} + +BOOST_AUTO_TEST_CASE(ComputeInputDerivative_AnalyticNumericDerivativesMatch) +{ + size_t numInput = 3; + size_t numOutput = 4; + NeuralNetworx nn(vector{numInput, 4, 5, numOutput}, + vector{"l", "p", "p", "p"}); + + mt19937_64 rng(0); + uniform_real_distribution distribution(-1.0, 1.0); + auto generator = [&distribution, &rng]() { return distribution(rng); }; + + vector connections(nn.getNumConnections()); + generate(connections.begin(), connections.end(), generator); + nn.setConnections(connections); + + vector input(numInput); + generate(input.begin(), input.end(), generator); + nn.setInput(input); + + nn.propagate(true); + vector> derivInput; + nn.getDerivInput(derivInput); + + vector outputLeft; + vector outputRight; + vector> derivInputNum(derivInput.size()); + for (size_t i = 0; i < numInput; ++i) + { + vector tmpInput = input; + tmpInput.at(i) -= delta; + nn.setInput(tmpInput); + nn.propagate(); + nn.getOutput(outputLeft); + + tmpInput.at(i) += 2.0 * delta; + nn.setInput(tmpInput); + nn.propagate(); + nn.getOutput(outputRight); + for (size_t o = 0; o < numOutput; ++o) + { + derivInputNum.at(o).push_back( + (outputRight.at(o) - outputLeft.at(o)) / (2.0 * delta)); + } + } + + for (size_t o = 0; o < numOutput; ++o) + { + for (size_t i = 0; i < numInput; ++i) + { + BOOST_TEST_INFO("dy_out(" << o << ")/dy_in(" << i << ")"); + BOOST_REQUIRE_SMALL(derivInput.at(o).at(i) + - derivInputNum.at(o).at(i), + 10 * delta); + } + } + + //for (auto l : nn.info()) cout << l; + + //for (auto on : derivInputNum) + //{ + // for (auto in : on) + // { + // cout << in << " "; + // } + // cout << "\n"; + //} + + //cout << "\n"; + + //for (auto on : derivInput) + //{ + // for (auto in : on) + // { + // cout << in << " "; + // } + // cout << "\n"; + //} +} + +BOOST_DATA_TEST_CASE(GetLayerLimits_CorrectLimits, + bdata::make(containerNN.examples), + example) +{ + NeuralNetworx nn(example.numNeuronsPerLayer, example.activationPerLayer); + auto limits = nn.getLayerLimits(); + if (example.limitsLayers.size() > 0) + { + BOOST_REQUIRE_EQUAL(limits.size(), example.limitsLayers.size()); + } +} + +BOOST_DATA_TEST_CASE(GetNeuronLimits_CorrectLimits, + bdata::make(containerNN.examples), + example) +{ + NeuralNetworx nn(example.numNeuronsPerLayer, example.activationPerLayer); + auto limits = nn.getNeuronLimits(); + if (example.limitsNeurons.size() > 0) + { + //BOOST_REQUIRE_EQUAL(limits.size(), example.limitsNeurons.size()); + } +} + +BOOST_AUTO_TEST_SUITE_END()