diff --git a/examples/input.nn.recommended b/examples/input.nn.recommended
index e53934e0f..a0030bf52 100644
--- a/examples/input.nn.recommended
+++ b/examples/input.nn.recommended
@@ -35,6 +35,7 @@ parallel_mode                   0              # Training parallelization used (
 jacobian_mode                   1              # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian).
 update_strategy                 0              # Update strategy (0 = Combined, 1 = Per-element).
 selection_mode                  2              # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold).
+decoupling_type                 0              # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF).
 task_batch_size_energy          1              # Number of energy update candidates prepared per task for each update (0 = Entire training set).
 task_batch_size_force           1              # Number of force update candidates prepared per task for each update (0 = Entire training set).
 memorize_symfunc_results                       # Keep symmetry function results in memory.
diff --git a/examples/nnp-train/Cu2S_PBE/input.nn b/examples/nnp-train/Cu2S_PBE/input.nn
index b4aa9333e..23eccf6db 100644
--- a/examples/nnp-train/Cu2S_PBE/input.nn
+++ b/examples/nnp-train/Cu2S_PBE/input.nn
@@ -44,6 +44,7 @@ parallel_mode                   0              # Training parallelization used (
 jacobian_mode                   1              # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian).
 update_strategy                 0              # Update strategy (0 = Combined, 1 = Per-element).
 selection_mode                  2              # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold).
+decoupling_type                 0              # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF).
 task_batch_size_energy          1              # Number of energy update candidates prepared per task for each update (0 = Entire training set).
 task_batch_size_force           1              # Number of force update candidates prepared per task for each update (0 = Entire training set).
 memorize_symfunc_results                       # Keep symmetry function results in memory.
diff --git a/examples/nnp-train/H2O_RPBE-D3/input.nn b/examples/nnp-train/H2O_RPBE-D3/input.nn
index b2666eecd..05b3fcacb 100644
--- a/examples/nnp-train/H2O_RPBE-D3/input.nn
+++ b/examples/nnp-train/H2O_RPBE-D3/input.nn
@@ -44,6 +44,7 @@ parallel_mode                   0              # Training parallelization used (
 jacobian_mode                   1              # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian).
 update_strategy                 0              # Update strategy (0 = Combined, 1 = Per-element).
 selection_mode                  2              # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold).
+decoupling_type                 0              # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF).
 task_batch_size_energy          1              # Number of energy update candidates prepared per task for each update (0 = Entire training set).
 task_batch_size_force           1              # Number of force update candidates prepared per task for each update (0 = Entire training set).
 memorize_symfunc_results                       # Keep symmetry function results in memory.
diff --git a/examples/nnp-train/LJ/input.nn b/examples/nnp-train/LJ/input.nn
index d42118f4b..2a1812341 100644
--- a/examples/nnp-train/LJ/input.nn
+++ b/examples/nnp-train/LJ/input.nn
@@ -34,6 +34,7 @@ parallel_mode                   0              # Training parallelization used (
 jacobian_mode                   1              # Jacobian computation mode (0 = Summation to single gradient, 1 = Per-task summed gradient, 2 = Full Jacobian).
 update_strategy                 0              # Update strategy (0 = Combined, 1 = Per-element).
 selection_mode                  2              # Update candidate selection mode (0 = Random, 1 = Sort, 2 = Threshold).
+decoupling_type                 0              # Kalman filter decoupling type (0 = GEKF, 1 = ED-GEKF, 2 = Layer, 3 = NDEKF, 4 = FDEKF).
 task_batch_size_energy          1              # Number of energy update candidates prepared per task for each update (0 = Entire training set).
 task_batch_size_force           1              # Number of force update candidates prepared per task for each update (0 = Entire training set).
 memorize_symfunc_results                       # Keep symmetry function results in memory.
diff --git a/src/application/makefile b/src/application/makefile
index c8a2b6529..29b260a37 100644
--- a/src/application/makefile
+++ b/src/application/makefile
@@ -47,7 +47,8 @@ APP_CORE=nnp-convert \
          nnp-predict \
          nnp-prune \
          nnp-select \
-         nnp-symfunc
+         nnp-symfunc \
+	 nnx
 
 # Applications which require libnnp and libnnptrain:
 # (nnp-train itself is actually a special case).
@@ -55,7 +56,8 @@ APP_TRAIN=nnp-comp2 \
           nnp-dataset \
           nnp-norm \
           nnp-scaling \
-          nnp-sfclust
+          nnp-sfclust \
+          nnp-winit
 
 # All applications together.
 APP=$(APP_CORE) $(APP_TRAIN) nnp-train
@@ -72,7 +74,7 @@ CLEAN_APP=$(patsubst %, clean-%, $(APP))
 all: $(APP_CORE) $(APP_TRAIN) nnp-train
 
 # Applications which require only libnnp:
-$(APP_CORE): INCLUDES=-I./ -I$(PROJECT_INCLUDE)/
+$(APP_CORE): INCLUDES=-I./ -I$(PROJECT_INCLUDE)/ -I$(PROJECT_EIGEN)
 ifeq ($(MODE), shared)
 $(APP_CORE): LDFLAGS=-L$(PROJECT_LIB) -lnnp
 else
diff --git a/src/application/nnp-winit.cpp b/src/application/nnp-winit.cpp
new file mode 100644
index 000000000..285bb7612
--- /dev/null
+++ b/src/application/nnp-winit.cpp
@@ -0,0 +1,308 @@
+// n2p2 - A neural network potential package
+// Copyright (C) 2018 Andreas Singraber (University of Vienna)
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+#include "Log.h"
+#include "NeuralNetwork.h"
+#include "utility.h"
+#include <algorithm>
+#include <cstdlib>
+#include <iostream>
+#include <fstream>
+#include <random>
+#include <string>
+#include <vector>
+#include <gsl/gsl_histogram.h>
+
+using namespace std;
+using namespace nnp;
+
+int main(int argc, char* argv[])
+{
+    string activationString = "l";
+    int numNeuronsPerLayer = 10;
+    size_t numBins = 1000;
+    size_t numLayers = 7;
+    vector<double> meanActivation(numLayers, 0.0);
+    vector<double> sigmaActivation(numLayers, 0.0);
+    vector<size_t> countActivation(numLayers, 0);
+    vector<double> meanGradient(numLayers - 1, 0.0);
+    vector<double> sigmaGradient(numLayers - 1, 0.0);
+    vector<size_t> countGradient(numLayers - 1, 0);
+
+    mt19937_64 rng;
+    rng.seed(0);
+    uniform_real_distribution<> distUniform(-1.0, 1.0);
+    normal_distribution<> distNormal(0.0, 0.3);
+    auto generatorUniform = [&distUniform, &rng]() {return distUniform(rng);};
+    auto generatorNormal = [&distNormal, &rng]() {return distNormal(rng);};
+
+    if (argc != 2)
+    {
+        cout << "USAGE: " << argv[0] << " <activation>\n"
+             << "       <activation> ... Activation function type.\n";
+        return 1;
+    }
+
+    activationString = argv[1];
+
+    ofstream logFile;
+    logFile.open("nnp-winit.log");
+    Log log;
+    log.registerStreamPointer(&logFile);
+    log << "\n";
+    log << "*** WEIGHT INITIALIZATION TESTING *******"
+           "**************************************\n";
+    log << "\n";
+
+    NeuralNetworkTopology t;
+    t.numLayers = numLayers;
+    t.activationFunctionsPerLayer.resize(t.numLayers);
+    fill(t.activationFunctionsPerLayer.begin(),
+         t.activationFunctionsPerLayer.end() - 1,
+         activationFromString(activationString));
+    t.activationFunctionsPerLayer.back() = activationFromString("l");
+    t.numNeuronsPerLayer.resize(t.numLayers);
+    fill(t.numNeuronsPerLayer.begin(),
+         t.numNeuronsPerLayer.end() - 1,
+         numNeuronsPerLayer);
+    t.numNeuronsPerLayer.back() = 1;
+
+    NeuralNetwork nn(t.numLayers,
+                     t.numNeuronsPerLayer.data(),
+                     t.activationFunctionsPerLayer.data());
+    
+    log << nn.info();
+
+    vector<gsl_histogram*> histActivation;
+    vector<gsl_histogram*> histGradient;
+    for (size_t i = 0; i < numLayers; ++i)
+    {
+        histActivation.push_back(gsl_histogram_alloc(numBins));
+        gsl_histogram_set_ranges_uniform(histActivation.back(), -1.0, 1.0);
+        if (i < numLayers - 1)
+        {
+            histGradient.push_back(gsl_histogram_alloc(numBins));
+            gsl_histogram_set_ranges_uniform(histGradient.back(), -1.0, 1.0);
+        }
+    }
+
+    for (size_t i = 0; i < 100000; ++i)
+    {
+        vector<double> input(numNeuronsPerLayer);
+        generate(input.begin(), input.end(), generatorNormal);
+        nn.setInput(input.data());
+
+        vector<double> connections(nn.getNumConnections());
+        generate(connections.begin(), connections.end(), generatorNormal);
+        nn.setConnections(connections.data());
+        nn.modifyConnections(NeuralNetwork::MS_GLOROTBENGIO_NORMAL);
+        //nn.modifyConnections(NeuralNetwork::MS_FANIN_NORMAL);
+        nn.modifyConnections(NeuralNetwork::MS_ZEROBIAS);
+
+        nn.propagate();
+
+        auto activations = nn.getNeuronsPerLayer();
+
+        auto gradient = nn.getGradientsPerLayer();
+
+        for (size_t il = 0; il < numLayers; ++il)
+        {
+            auto const& l = activations.at(il);
+            for (auto const& a : l)
+            {
+                gsl_histogram_increment(histActivation.at(il), a);
+                // Welford's algorithm (use sigma as M2 storage).
+                countActivation.at(il)++;
+                double const delta = a - meanActivation.at(il);
+                meanActivation.at(il) += delta / countActivation.at(il);
+                double const delta2 = a - meanActivation.at(il);
+                sigmaActivation.at(il) += delta * delta2;
+            }
+            if (il < numLayers - 1)
+            {
+                auto const& l = gradient.at(il);
+                for (auto const& g : l)
+                {
+                    gsl_histogram_increment(histGradient.at(il), g);
+                    countGradient.at(il)++;
+                    double const delta = g - meanGradient.at(il);
+                    meanGradient.at(il) += delta / countGradient.at(il);
+                    double const delta2 = g - meanGradient.at(il);
+                    sigmaGradient.at(il) += delta * delta2;
+                }
+            }
+        }
+    }
+
+    //ofstream f;
+    //f.open("weights.dat");
+    //nn.writeConnections(f);
+    //f.close();
+
+    for (size_t i = 0; i < numLayers; ++i)
+    {
+        sigmaActivation.at(i) = sqrt(sigmaActivation.at(i)
+                                     / (countActivation.at(i) - 1));
+        if (i < numLayers - 1)
+        {
+            sigmaGradient.at(i) = sqrt(sigmaGradient.at(i)
+                                       / (countGradient.at(i) - 1));
+        }
+    }
+
+    log << "-----------------------------------------"
+           "--------------------------------------\n";
+    log << "Layer      |";
+    for (size_t i = 0; i < numLayers; ++i)
+    {
+        log << strpr(" %12zu", i + 1);
+    }
+    log << "\n";
+    log << "-----------------------------------------"
+           "--------------------------------------\n";
+    log << "Activation |";
+    log << strpr(" %12s", "G");
+    for (size_t i = 1; i < numLayers; ++i)
+    {
+        log << strpr(" %12s", stringFromActivation(
+                                  t.activationFunctionsPerLayer[i]).c_str());
+    }
+    log << "\n";
+    log << "Count      |";
+    for (size_t i = 0; i < numLayers; ++i)
+    {
+        log << strpr(" %12.3E", static_cast<double>(countActivation.at(i)));
+    }
+    log << "\n";
+    log << "Mean       |";
+    for (size_t i = 0; i < numLayers; ++i)
+    {
+        log << strpr(" %12.5f", meanActivation.at(i));
+    }
+    log << "\n";
+    log << "Sigma      |";
+    for (size_t i = 0; i < numLayers; ++i)
+    {
+        log << strpr(" %12.5f", sigmaActivation.at(i));
+    }
+    log << "\n";
+    log << "-----------------------------------------"
+           "--------------------------------------\n";
+    log << "Gradient\n";
+    log << "Count      |";
+    log << strpr(" %12s", "");
+    for (size_t i = 0; i < numLayers - 1; ++i)
+    {
+        log << strpr(" %12.3E", static_cast<double>(countGradient.at(i)));
+    }
+    log << "\n";
+    log << "Mean       |";
+    log << strpr(" %12s", "");
+    for (size_t i = 0; i < numLayers - 1; ++i)
+    {
+        log << strpr(" %12.5f", meanGradient.at(i));
+    }
+    log << "\n";
+    log << "Sigma      |";
+    log << strpr(" %12s", "");
+    for (size_t i = 0; i < numLayers - 1; ++i)
+    {
+        log << strpr(" %12.5f", sigmaGradient.at(i));
+    }
+    log << "\n";
+    log << "-----------------------------------------"
+           "--------------------------------------\n";
+
+    log << strpr("Writing activation histograms for %zu layers.\n", numLayers);
+    for (size_t i = 0; i < numLayers; ++i)
+    {
+
+        string fileName = strpr("activation.%03zu.out", i + 1);
+        FILE* fp = 0;
+        fp = fopen(fileName.c_str(), "w");
+
+        // File header.
+        vector<string> title;
+        vector<string> colName;
+        vector<string> colInfo;
+        vector<size_t> colSize;
+        title.push_back(strpr("Activations histogram for layer %zu with "
+                              "activation function %s.",
+                              i + 1,
+                              stringFromActivation(
+                                  t.activationFunctionsPerLayer[i]).c_str()));
+        colSize.push_back(16);
+        colName.push_back("activation_l");
+        colInfo.push_back("Activation value, left bin limit.");
+        colSize.push_back(16);
+        colName.push_back("activation_r");
+        colInfo.push_back("Activation value, right bin limit.");
+        colSize.push_back(16);
+        colName.push_back("hist");
+        colInfo.push_back("Histogram count.");
+        appendLinesToFile(fp,
+                          createFileHeader(title,
+                                           colSize,
+                                           colName,
+                                           colInfo));
+        gsl_histogram_fprintf(fp, histActivation.at(i), "%16.8E", "%16.8E");
+        fflush(fp);
+        fclose(fp);
+        fp = nullptr;
+    }
+
+    log << strpr("Writing gradient histograms for %zu layers.\n",
+                 numLayers - 1);
+    for (size_t i = 0; i < numLayers - 1; ++i)
+    {
+
+        string fileName = strpr("gradient.%03zu.out", i + 2);
+        FILE* fp = 0;
+        fp = fopen(fileName.c_str(), "w");
+
+        // File header.
+        vector<string> title;
+        vector<string> colName;
+        vector<string> colInfo;
+        vector<size_t> colSize;
+        title.push_back(strpr("Gradient histogram for layer %zu.", i + 2));
+        colSize.push_back(16);
+        colName.push_back("gradient_l");
+        colInfo.push_back("Gradient value, left bin limit.");
+        colSize.push_back(16);
+        colName.push_back("gradient_r");
+        colInfo.push_back("Gradient value, right bin limit.");
+        colSize.push_back(16);
+        colName.push_back("hist");
+        colInfo.push_back("Histogram count.");
+        appendLinesToFile(fp,
+                          createFileHeader(title,
+                                           colSize,
+                                           colName,
+                                           colInfo));
+        gsl_histogram_fprintf(fp, histGradient.at(i), "%16.8E", "%16.8E");
+        fflush(fp);
+        fclose(fp);
+        fp = nullptr;
+    }
+
+    log << "Finished.\n";
+    log << "*****************************************"
+           "**************************************\n";
+    logFile.close();
+
+    return 0;
+}
diff --git a/src/application/nnx.cpp b/src/application/nnx.cpp
new file mode 100644
index 000000000..95c765b96
--- /dev/null
+++ b/src/application/nnx.cpp
@@ -0,0 +1,40 @@
+// n2p2 - A neural network potential package
+// Copyright (C) 2018 Andreas Singraber (University of Vienna)
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+#include "NeuralNetworx.h"
+#include <cstddef>
+#include <iostream>
+#include <string>
+#include <vector>
+
+using namespace std;
+using namespace nnp;
+
+int main(int argc, char* argv[])
+{
+    vector<size_t> numNeuronsPerLayer = {10, 15, 15, 3};
+    //vector<string> activationPerLayer = {"G", "t", "s", "l"};
+    //NeuralNetworx nn(numNeuronsPerLayer, activationPerLayer);
+    vector<NeuralNetworx::Activation> activationPerLayer = {NeuralNetworx::Activation::IDENTITY,
+                                                            NeuralNetworx::Activation::TANH,
+                                                            NeuralNetworx::Activation::LOGISTIC,
+                                                            NeuralNetworx::Activation::IDENTITY};
+    NeuralNetworx nn(numNeuronsPerLayer, activationPerLayer);
+
+    for (auto l : nn.info()) cout << l;
+
+    return 0;
+}
diff --git a/src/libnnp/Mode.cpp b/src/libnnp/Mode.cpp
index 475ec8ff3..b2d0cd548 100644
--- a/src/libnnp/Mode.cpp
+++ b/src/libnnp/Mode.cpp
@@ -758,51 +758,7 @@ void Mode::setupNeuralNetwork()
             {
                 t.numNeuronsPerLayer[i] =
                     atoi(globalNumNeuronsPerHiddenLayer.at(i-1).c_str());
-                if (globalActivationFunctions.at(i-1) == "l")
-                {
-                    a = NeuralNetwork::AF_IDENTITY;
-                }
-                else if (globalActivationFunctions.at(i-1) == "t")
-                {
-                    a = NeuralNetwork::AF_TANH;
-                }
-                else if (globalActivationFunctions.at(i-1) == "s")
-                {
-                    a = NeuralNetwork::AF_LOGISTIC;
-                }
-                else if (globalActivationFunctions.at(i-1) == "p")
-                {
-                    a = NeuralNetwork::AF_SOFTPLUS;
-                }
-                else if (globalActivationFunctions.at(i-1) == "r")
-                {
-                    a = NeuralNetwork::AF_RELU;
-                }
-                else if (globalActivationFunctions.at(i-1) == "g")
-                {
-                    a = NeuralNetwork::AF_GAUSSIAN;
-                }
-                else if (globalActivationFunctions.at(i-1) == "c")
-                {
-                    a = NeuralNetwork::AF_COS;
-                }
-                else if (globalActivationFunctions.at(i-1) == "S")
-                {
-                    a = NeuralNetwork::AF_REVLOGISTIC;
-                }
-                else if (globalActivationFunctions.at(i-1) == "e")
-                {
-                    a = NeuralNetwork::AF_EXP;
-                }
-                else if (globalActivationFunctions.at(i-1) == "h")
-                {
-                    a = NeuralNetwork::AF_HARMONIC;
-                }
-                else
-                {
-                    throw runtime_error("ERROR: Unknown activation "
-                                        "function.\n");
-                }
+                a = activationFromString(globalActivationFunctions.at(i-1));
             }
         }
     }
@@ -884,7 +840,9 @@ void Mode::setupNeuralNetworkWeights(string const& fileNameFormat)
                 weights.push_back(atof(splitLine.at(0).c_str()));
             }
         }
-        it->neuralNetwork->setConnections(&(weights.front()));
+        // Attention: need alternative (old) ordering scheme here for
+        // backward compatibility!
+        it->neuralNetwork->setConnectionsAO(&(weights.front()));
         file.close();
     }
 
diff --git a/src/libnnp/NeuralNetwork.cpp b/src/libnnp/NeuralNetwork.cpp
index e7b7808c6..6271bcf94 100644
--- a/src/libnnp/NeuralNetwork.cpp
+++ b/src/libnnp/NeuralNetwork.cpp
@@ -80,12 +80,26 @@ NeuralNetwork(int                              numLayers,
         weightOffset[i] = weightOffset[i-1] +
                           (layers[i-1].numNeurons + 1) * layers[i].numNeurons;
     }
+#ifndef ALTERNATIVE_WEIGHT_ORDERING
+    biasOffset = new int*[numLayers-1];
+    for (int i = 0; i < numLayers-1; i++)
+    {
+        biasOffset[i] = new int[layers[i+1].numNeurons];
+        for (int j = 0; j < layers[i+1].numNeurons; j++)
+        {
+            biasOffset[i][j] = weightOffset[i]
+                             + (layers[i+1].numNeuronsPrevLayer + 1)
+                             * (j + 1) - 1;
+        }
+    }
+#else
     biasOffset = new int[numLayers-1];
     for (int i = 0; i < numLayers-1; i++)
     {
         biasOffset[i] = weightOffset[i] +
                         layers[i+1].numNeurons * layers[i].numNeurons;
     }
+#endif
     biasOnlyOffset = new int[numLayers-1];
     biasOnlyOffset[0] = 0;
     for (int i = 1; i < numLayers-1; i++)
@@ -106,7 +120,15 @@ NeuralNetwork::~NeuralNetwork()
     }
     delete[] layers;
     delete[] weightOffset;
+#ifndef ALTERNATIVE_WEIGHT_ORDERING
+    for (int i = 0; i < numLayers - 1; i++)
+    {
+        delete[] biasOffset[i];
+    }
+    delete[] biasOffset;
+#else
     delete[] biasOffset;
+#endif
     delete[] biasOnlyOffset;
 }
 
@@ -148,6 +170,27 @@ void NeuralNetwork::setConnections(double const* const& connections)
 {
     int count = 0;
 
+    for (int i = 1; i < numLayers; i++)
+    {
+        for (int j = 0; j < layers[i].numNeurons; j++)
+        {
+            for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++)
+            {
+                layers[i].neurons[j].weights[k] = connections[count];
+                count++;
+            }
+            layers[i].neurons[j].bias = connections[count];
+            count++;
+        }
+    }
+
+    return;
+}
+
+void NeuralNetwork::setConnectionsAO(double const* const& connections)
+{
+    int count = 0;
+
     for (int i = 1; i < numLayers; i++)
     {
         for (int j = 0; j < layers[i].numNeuronsPrevLayer; j++)
@@ -172,6 +215,27 @@ void NeuralNetwork::getConnections(double* connections) const
 {
     int count = 0;
 
+    for (int i = 1; i < numLayers; i++)
+    {
+        for (int j = 0; j < layers[i].numNeurons; j++)
+        {
+            for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++)
+            {
+                connections[count] = layers[i].neurons[j].weights[k] ;
+                count++;
+            }
+            connections[count] = layers[i].neurons[j].bias;
+            count++;
+        }
+    }
+
+    return;
+}
+
+void NeuralNetwork::getConnectionsAO(double* connections) const
+{
+    int count = 0;
+
     for (int i = 1; i < numLayers; i++)
     {
         for (int j = 0; j < layers[i].numNeuronsPrevLayer; j++)
@@ -231,36 +295,119 @@ void NeuralNetwork::modifyConnections(ModificationScheme modificationScheme)
             }
         }
     }
-    else if (modificationScheme == MS_FANIN)
+    else if (modificationScheme == MS_FANIN_UNIFORM)
     {
         for (int i = 1; i < numLayers; i++)
         {
-            if(layers[i].activationFunction == AF_TANH)
+            for (int j = 0; j < layers[i].numNeurons; j++)
             {
-                for (int j = 0; j < layers[i].numNeurons; j++)
+                for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++)
                 {
-                    for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++)
+                    if(layers[i].activationFunction == AF_TANH)
                     {
-                        layers[i].neurons[j].weights[k] /=
-                                           sqrt(layers[i].numNeuronsPrevLayer);
+                        layers[i].neurons[j].weights[k] *=
+                            sqrt(3.0 / layers[i].numNeuronsPrevLayer);
+                    }
+                    else if(layers[i].activationFunction == AF_LOGISTIC)
+                    {
+                        layers[i].neurons[j].weights[k] *=
+                            4.0 * sqrt(3.0 / layers[i].numNeuronsPrevLayer);
+                    }
+                    else if(layers[i].activationFunction == AF_RELU ||
+                            layers[i].activationFunction == AF_SOFTPLUS)
+                    {
+                        layers[i].neurons[j].weights[k] *=
+                            sqrt(6.0 / layers[i].numNeuronsPrevLayer);
                     }
                 }
             }
         }
     }
-    else if (modificationScheme == MS_GLOROTBENGIO)
+    else if (modificationScheme == MS_FANIN_NORMAL)
     {
         for (int i = 1; i < numLayers; i++)
         {
-            if(layers[i].activationFunction == AF_TANH)
+            for (int j = 0; j < layers[i].numNeurons; j++)
             {
-                for (int j = 0; j < layers[i].numNeurons; j++)
+                for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++)
                 {
-                    for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++)
+                    if(layers[i].activationFunction == AF_TANH)
+                    {
+                        layers[i].neurons[j].weights[k] *=
+                            sqrt(1.0 / layers[i].numNeuronsPrevLayer);
+                    }
+                    else if(layers[i].activationFunction == AF_LOGISTIC)
+                    {
+                        layers[i].neurons[j].weights[k] *=
+                            4.0 * sqrt(1.0 / layers[i].numNeuronsPrevLayer);
+                    }
+                    else if(layers[i].activationFunction == AF_RELU ||
+                            layers[i].activationFunction == AF_SOFTPLUS)
                     {
-                        layers[i].neurons[j].weights[k] *= sqrt(6.0 / (
-                                             layers[i].numNeuronsPrevLayer
-                                           + layers[i].numNeurons));
+                        layers[i].neurons[j].weights[k] *=
+                            sqrt(2.0 / layers[i].numNeuronsPrevLayer);
+                    }
+                }
+            }
+        }
+    }
+    else if (modificationScheme == MS_GLOROTBENGIO_UNIFORM)
+    {
+        for (int i = 1; i < numLayers; i++)
+        {
+            for (int j = 0; j < layers[i].numNeurons; j++)
+            {
+                for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++)
+                {
+                    if(layers[i].activationFunction == AF_TANH)
+                    {
+                        layers[i].neurons[j].weights[k] *=
+                            sqrt(6.0 / (layers[i].numNeuronsPrevLayer
+                                      + layers[i].numNeurons));
+                    }
+                    else if(layers[i].activationFunction == AF_LOGISTIC)
+                    {
+                        layers[i].neurons[j].weights[k] *=
+                            4.0 * sqrt(6.0 / (layers[i].numNeuronsPrevLayer
+                                            + layers[i].numNeurons));
+                    }
+                    else if(layers[i].activationFunction == AF_RELU ||
+                            layers[i].activationFunction == AF_SOFTPLUS)
+                    {
+                        layers[i].neurons[j].weights[k] *=
+                            sqrt(12.0 / (layers[i].numNeuronsPrevLayer
+                                       + layers[i].numNeurons));
+                    }
+                }
+            }
+        }
+    }
+    else if (modificationScheme == MS_GLOROTBENGIO_NORMAL)
+    {
+        for (int i = 1; i < numLayers; i++)
+        {
+            for (int j = 0; j < layers[i].numNeurons; j++)
+            {
+                for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++)
+                {
+                    if(layers[i].activationFunction == AF_TANH)
+                    {
+                        layers[i].neurons[j].weights[k] *=
+                            sqrt(2.0 / (layers[i].numNeuronsPrevLayer
+                                      + layers[i].numNeurons));
+                    }
+                    else if(layers[i].activationFunction == AF_LOGISTIC)
+                    {
+                        layers[i].neurons[j].weights[k] *=
+                            4.0 * sqrt(2.0 / (layers[i].numNeuronsPrevLayer
+                                            + layers[i].numNeurons));
+                    }
+                    else if(layers[i].activationFunction == AF_RELU ||
+                            layers[i].activationFunction == AF_SOFTPLUS)
+                    {
+                        layers[i].neurons[j].weights[k] *=
+                            sqrt(4.0 / (layers[i].numNeuronsPrevLayer
+                                      + layers[i].numNeurons));
                     }
                 }
             }
@@ -368,6 +515,42 @@ void NeuralNetwork::getOutput(double* output) const
     return;
 }
 
+vector<vector<double>> NeuralNetwork::getNeuronsPerLayer() const
+{
+    vector<vector<double>> output;
+
+    for (int i = 0; i < numLayers; ++i)
+    {
+        output.push_back(vector<double>());
+        for (int j = 0; j < layers[i].numNeurons; ++j)
+        {
+            output.back().push_back(layers[i].neurons[j].value);
+        }
+    }
+
+    return output;
+}
+
+vector<vector<double>> NeuralNetwork::getGradientsPerLayer() const
+{
+    vector<vector<double>> output;
+
+    double* dEdc = new double[getNumConnections()];
+    calculateDEdc(dEdc);
+
+    for (int i = 0; i < numLayers - 2; ++i)
+    {
+        output.push_back(vector<double>(dEdc + weightOffset[i],
+                                        dEdc + weightOffset[i + 1]));
+    }
+    output.push_back(vector<double>(dEdc + weightOffset[numLayers - 2],
+                                    dEdc + getNumConnections()));
+
+    delete[] dEdc;
+
+    return output;
+}
+
 void NeuralNetwork::propagate()
 {
     for (int i = 1; i < numLayers; i++)
@@ -428,6 +611,52 @@ void NeuralNetwork::calculateDEdG(double *dEdG) const
 
 void NeuralNetwork::calculateDEdc(double* dEdc) const
 {
+#ifndef ALTERNATIVE_WEIGHT_ORDERING
+    int count = 0;
+
+    for (int i = 0; i < numConnections; i++)
+    {
+        dEdc[i] = 0.0;
+    }
+
+    for (int i = 0; i < outputLayer->numNeurons; i++)
+    {
+        dEdc[biasOffset[numLayers-2][i]] = outputLayer->neurons[i].dfdx;
+        if (normalizeNeurons)
+        {
+            dEdc[biasOffset[numLayers-2][i]] /=
+                outputLayer->numNeuronsPrevLayer;
+        }
+    }
+
+    for (int i = numLayers - 2; i >= 0; i--)
+    {
+        count = 0;
+        for (int j = 0; j < layers[i+1].numNeurons; j++)
+        {
+            for (int k = 0; k < layers[i].numNeurons; k++)
+            {
+                dEdc[weightOffset[i]+count] = dEdc[biasOffset[i][j]]
+                                            * layers[i].neurons[k].value;
+                count++;
+                if (i >= 1)
+                {
+                    dEdc[biasOffset[i-1][k]] += dEdc[biasOffset[i][j]]
+                        * layers[i+1].neurons[j].weights[k]
+                        * layers[i].neurons[k].dfdx;
+                }
+            }
+            count++;
+        }
+        if (normalizeNeurons && i >= 1)
+        {
+            for (int k = 0; k < layers[i].numNeurons; k++)
+            {
+                dEdc[biasOffset[i-1][k]] /= layers[i].numNeuronsPrevLayer;
+            }
+        }
+    }
+#else
     int count = 0;
 
     for (int i = 0; i < numConnections; i++)
@@ -468,6 +697,7 @@ void NeuralNetwork::calculateDEdc(double* dEdc) const
             }
         }
     }
+#endif
 
     return;
 }
@@ -512,6 +742,67 @@ void NeuralNetwork::calculateDFdc(double*              dFdc,
 }
 
 void NeuralNetwork::writeConnections(std::ofstream& file) const
+{
+    // File header.
+    vector<string> title;
+    vector<string> colName;
+    vector<string> colInfo;
+    vector<size_t> colSize;
+    title.push_back("Neural network connection values (weights and biases).");
+    colSize.push_back(24);
+    colName.push_back("connection");
+    colInfo.push_back("Neural network connection value.");
+    colSize.push_back(1);
+    colName.push_back("t");
+    colInfo.push_back("Connection type (a = weight, b = bias).");
+    colSize.push_back(9);
+    colName.push_back("index");
+    colInfo.push_back("Index enumerating weights.");
+    colSize.push_back(5);
+    colName.push_back("l_s");
+    colInfo.push_back("Starting point layer (end point layer for biases).");
+    colSize.push_back(5);
+    colName.push_back("n_s");
+    colInfo.push_back("Starting point neuron in starting layer (end point "
+                      "neuron for biases).");
+    colSize.push_back(5);
+    colName.push_back("l_e");
+    colInfo.push_back("End point layer.");
+    colSize.push_back(5);
+    colName.push_back("n_e");
+    colInfo.push_back("End point neuron in end layer.");
+    appendLinesToFile(file,
+                      createFileHeader(title, colSize, colName, colInfo));
+
+    int count = 0;
+    for (int i = 1; i < numLayers; i++)
+    {
+        for (int j = 0; j < layers[i].numNeurons; j++)
+        {
+            for (int k = 0; k < layers[i].numNeuronsPrevLayer; k++)
+            {
+                count++;
+                file << strpr("%24.16E a %9d %5d %5d %5d %5d\n",
+                              layers[i].neurons[j].weights[k],
+                              count,
+                              i - 1,
+                              k + 1,
+                              i,
+                              j + 1);
+            }
+            count++;
+            file << strpr("%24.16E b %9d %5d %5d\n",
+                          layers[i].neurons[j].bias,
+                          count,
+                          i,
+                          j + 1);
+        }
+    }
+
+    return;
+}
+
+void NeuralNetwork::writeConnectionsAO(std::ofstream& file) const
 {
     // File header.
     vector<string> title;
@@ -645,6 +936,67 @@ void NeuralNetwork::calculateD2EdGdc(int                  index,
                                      double const* const& dEdb,
                                      double*              d2EdGdc) const
 {
+#ifndef ALTERNATIVE_WEIGHT_ORDERING
+    int count = 0;
+
+    for (int i = 0; i < outputLayer->numNeurons; i++)
+    {
+        d2EdGdc[biasOffset[numLayers-2][i]] = outputLayer->neurons[i].d2fdx2
+                                            * outputLayer->neurons[i].dxdG;
+        if (normalizeNeurons)
+        {
+            d2EdGdc[biasOffset[numLayers-2][i]] /=
+                outputLayer->numNeuronsPrevLayer;
+        }
+    }
+
+    for (int i = numLayers-2; i >= 0; i--)
+    {
+        count = 0;
+        for (int j = 0; j < layers[i+1].numNeurons; j++)
+        {
+            for (int k = 0; k < layers[i].numNeurons; k++)
+            {
+                if (i == 0)
+                {
+                    d2EdGdc[weightOffset[i]+count] = d2EdGdc[biasOffset[i][j]]
+                        * layers[i].neurons[k].value;
+                    if (k == index)
+                    {
+                        d2EdGdc[weightOffset[i]+count] +=
+                            dEdb[biasOnlyOffset[i]+j];
+                    }
+                }
+                else
+                {
+                    d2EdGdc[weightOffset[i]+count] = d2EdGdc[biasOffset[i][j]]
+                        * layers[i].neurons[k].value
+                        + dEdb[biasOnlyOffset[i]+j] * layers[i].neurons[k].dfdx
+                        * layers[i].neurons[k].dxdG;
+                }
+                count++;
+                if (i >= 1)
+                {
+                    d2EdGdc[biasOffset[i-1][k]] +=
+                        layers[i+1].neurons[j].weights[k]
+                        * (d2EdGdc[biasOffset[i][j]]
+                        * layers[i].neurons[k].dfdx
+                        + dEdb[biasOnlyOffset[i]+j]
+                        * layers[i].neurons[k].d2fdx2
+                        * layers[i].neurons[k].dxdG);
+                }
+            }
+            count++;
+        }
+        for (int k = 0; k < layers[i].numNeurons; k++)
+        {
+            if (normalizeNeurons && i >= 1)
+            {
+                d2EdGdc[biasOffset[i-1][k]] /= layers[i].numNeuronsPrevLayer;
+            }
+        }
+    }
+#else
     int count = 0;
 
     for (int i = 0; i < outputLayer->numNeurons; i++)
@@ -699,6 +1051,7 @@ void NeuralNetwork::calculateD2EdGdc(int                  index,
             }
         }
     }
+#endif
 
     return;
 }
@@ -921,6 +1274,45 @@ void NeuralNetwork::getNeuronStatistics(long*   count,
     return;
 }
 
+vector<pair<size_t, size_t>> NeuralNetwork::getLayerLimits() const
+{
+    vector<pair<size_t, size_t>> limits;
+
+    for (int i = 0; i < numLayers - 2; ++i)
+    {
+        limits.push_back(make_pair(weightOffset[i],
+                                       weightOffset[i + 1] - 1));
+    }
+    limits.push_back(make_pair(weightOffset[numLayers - 2],
+                                   numConnections - 1));
+
+    return limits;
+}
+
+vector<pair<size_t, size_t>> NeuralNetwork::getNeuronLimits() const
+{
+    vector<pair<size_t, size_t>> limits;
+#ifndef ALTERNATIVE_WEIGHT_ORDERING
+    for (int i = 0; i < numLayers - 1; ++i)
+    {
+        limits.push_back(make_pair(weightOffset[i],
+                                       biasOffset[i][0]));
+        for (int j = 0; j < layers[i+1].numNeurons - 1; ++j)
+        {
+            limits.push_back(make_pair(biasOffset[i][j] + 1,
+                                           biasOffset[i][j+1]));
+
+        }
+    }
+#else
+    throw runtime_error("ERROR: Neuron boundaries not available with "
+                        "alternative (old) weight memory layout, recompile "
+                        "without -DALTERNATIVE_WEIGHT_ORDERING.\n");
+#endif
+
+    return limits;
+}
+
 /*
 void NeuralNetwork::writeStatus(int element, int epoch)
 {
@@ -969,7 +1361,15 @@ long NeuralNetwork::getMemoryUsage()
     int  numNeurons = getNumNeurons();
 
     mem += (numLayers - 1) * sizeof(int); // weightOffset
+#ifndef ALTERNATIVE_WEIGHT_ORDERING
+    mem += (numLayers - 1) * sizeof(int*); // biasOffset
+    for (int i = 0; i < numLayers - 1; i++)
+    {
+        mem += layers[i].numNeurons * sizeof(int);
+    }
+#else
     mem += (numLayers - 1) * sizeof(int); // biasOffset
+#endif
     mem += (numLayers - 1) * sizeof(int); // biasOnlyOffset
     mem += numLayers  * sizeof(Layer);    // layers
     mem += numNeurons * sizeof(Neuron);   // neurons
@@ -1008,45 +1408,11 @@ vector<string> NeuralNetwork::info() const
                 {
                     s += strpr(" %3s", "G");
                 }
-                else if (layers[j].activationFunction == AF_IDENTITY)
-                {
-                    s += strpr(" %3s", "l");
-                }
-                else if (layers[j].activationFunction == AF_TANH)
-                {
-                    s += strpr(" %3s", "t");
-                }
-                else if (layers[j].activationFunction == AF_LOGISTIC)
-                {
-                    s += strpr(" %3s", "s");
-                }
-                else if (layers[j].activationFunction == AF_SOFTPLUS)
-                {
-                    s += strpr(" %3s", "p");
-                }
-                else if (layers[j].activationFunction == AF_RELU)
-                {
-                    s += strpr(" %3s", "r");
-                }
-                else if (layers[j].activationFunction == AF_GAUSSIAN)
-                {
-                    s += strpr(" %3s", "g");
-                }
-                else if (layers[j].activationFunction == AF_COS)
-                {
-                    s += strpr(" %3s", "c");
-                }
-                else if (layers[j].activationFunction == AF_REVLOGISTIC)
-                {
-                    s += strpr(" %3s", "S");
-                }
-                else if (layers[j].activationFunction == AF_EXP)
-                {
-                    s += strpr(" %3s", "e");
-                }
-                else if (layers[j].activationFunction == AF_HARMONIC)
+                else
                 {
-                    s += strpr(" %3s", "h");
+                    s += strpr(" %3s",
+                               stringFromActivation(
+                                   layers[j].activationFunction).c_str());
                 }
             }
             else
@@ -1059,3 +1425,47 @@ vector<string> NeuralNetwork::info() const
 
     return v;
 }
+
+string nnp::stringFromActivation(NeuralNetwork::ActivationFunction af)
+{
+    string c = "";
+
+    if      (af == NeuralNetwork::AF_IDENTITY)    c = "l";
+    else if (af == NeuralNetwork::AF_TANH)        c = "t";
+    else if (af == NeuralNetwork::AF_LOGISTIC)    c = "s";
+    else if (af == NeuralNetwork::AF_SOFTPLUS)    c = "p";
+    else if (af == NeuralNetwork::AF_RELU)        c = "r";
+    else if (af == NeuralNetwork::AF_GAUSSIAN)    c = "g";
+    else if (af == NeuralNetwork::AF_COS)         c = "c";
+    else if (af == NeuralNetwork::AF_REVLOGISTIC) c = "S";
+    else if (af == NeuralNetwork::AF_EXP)         c = "e";
+    else if (af == NeuralNetwork::AF_HARMONIC)    c = "h";
+    else
+    {
+        throw runtime_error("ERROR: Unknown activation function.\n");
+    }
+
+    return c;
+}
+
+NeuralNetwork::ActivationFunction nnp::activationFromString(string c)
+{
+    NeuralNetwork::ActivationFunction af;
+
+    if      (c == "l") af = NeuralNetwork::AF_IDENTITY;
+    else if (c == "t") af = NeuralNetwork::AF_TANH;
+    else if (c == "s") af = NeuralNetwork::AF_LOGISTIC;
+    else if (c == "p") af = NeuralNetwork::AF_SOFTPLUS;
+    else if (c == "r") af = NeuralNetwork::AF_RELU;
+    else if (c == "g") af = NeuralNetwork::AF_GAUSSIAN;
+    else if (c == "c") af = NeuralNetwork::AF_COS;
+    else if (c == "S") af = NeuralNetwork::AF_REVLOGISTIC;
+    else if (c == "e") af = NeuralNetwork::AF_EXP;
+    else if (c == "h") af = NeuralNetwork::AF_HARMONIC;
+    else
+    {
+        throw runtime_error("ERROR: Unknown activation function.\n");
+    }
+
+    return af;
+}
diff --git a/src/libnnp/NeuralNetwork.h b/src/libnnp/NeuralNetwork.h
index c56c58e9f..842a4ff80 100644
--- a/src/libnnp/NeuralNetwork.h
+++ b/src/libnnp/NeuralNetwork.h
@@ -17,8 +17,10 @@
 #ifndef NEURALNETWORK_H
 #define NEURALNETWORK_H
 
+#include <cstddef>   // std::size_t
 #include <fstream>   // std::ofstream
 #include <string>    // std::string
+#include <utility>   // std::pair
 #include <vector>    // std::vector
 
 namespace nnp
@@ -60,32 +62,64 @@ class NeuralNetwork
         MS_ZEROBIAS,
         /// Set all weights connecting to the output layer to zero.
         MS_ZEROOUTPUTWEIGHTS,
+        /** Normalize weights via number of neuron inputs (fan-in).
+         *
+         * If initial weights are uniformly distributed in
+         * @f$\left[-1, 1\right]@f$ they will be scaled to be in
+         * @f$\left[-\sqrt{\frac{3}{n_\text{in}}},
+         * \sqrt{\frac{3}{n_\text{in}}}\right]@f$, where @f$n_\text{in}@f$ is
+         * the number of incoming weights of a neuron. The interval is
+         * valid for #AF_TANH and is slightly modified for #AF_LOGISTIC,
+         * #AF_RELU and #AF_SOFTPLUS. Weights are not modified for other
+         * activation functions.
+         */
+        MS_FANIN_UNIFORM,
         /** Normalize weights via number of neuron inputs (fan-in).
          *
          * If initial weights are uniformly distributed in
          * @f$\left[-1, 1\right]@f$ they will be scaled to be in
          * @f$\left[\frac{-1}{\sqrt{n_\text{in}}},
          * \frac{1}{\sqrt{n_\text{in}}}\right]@f$, where @f$n_\text{in}@f$ is
-         * the number of incoming weights of a neuron (if activation
-         * function is of type #AF_TANH).
+         * the number of incoming weights of a neuron. The interval is
+         * valid for #AF_TANH and is slightly modified for #AF_LOGISTIC,
+         * #AF_RELU and #AF_SOFTPLUS. Weights are not modified for other
+         * activation functions.
          */
-        MS_FANIN,
+        MS_FANIN_NORMAL,
         /** Normalize connections according to Glorot and Bengio.
          *
          * If initial weights are uniformly distributed in
          * @f$\left[-1, 1\right]@f$ they will be scaled to be in
          * @f$\left[-\sqrt{\frac{6}{n_\text{in} + n_\text{out}}},
          * \sqrt{\frac{6}{n_\text{in} + n_\text{out}}}\right]@f$, where
-         * @f$n_\text{in}@f$ and @f$n_\text{out}@f$ are the number of incoming
-         * and outgoing weights of a neuron, respectively (if activation
-         * function is of type #AF_TANH).
+         * @f$n_\text{in}@f$ is the number of incoming weihghts and
+         * @f$n_\text{out}@f$ is the number of neurons in this layer.
+         * The interval is valid for #AF_TANH and is slightly modified for
+         * #AF_LOGISTIC, #AF_RELU and #AF_SOFTPLUS.
          *
          * For details see:
          *  - X. Glorot and Y. Bengio, "Understanding the difficulty of
          *    training deep feedforward neural networks", International
          *    conference on artificial intelligence and statistics. 2010.
          */
-        MS_GLOROTBENGIO,
+        MS_GLOROTBENGIO_UNIFORM,
+        /** Normalize connections according to Glorot and Bengio.
+         *
+         * If initial weights are normally distributed in
+         * @f$\left[-1, 1\right]@f$ they will be scaled to be in
+         * @f$\left[-\sqrt{\frac{2}{n_\text{in} + n_\text{out}}},
+         * \sqrt{\frac{2}{n_\text{in} + n_\text{out}}}\right]@f$, where
+         * @f$n_\text{in}@f$ is the number of incoming weihghts and
+         * @f$n_\text{out}@f$ is the number of neurons in this layer.
+         * The interval is valid for #AF_TANH and is slightly modified for
+         * #AF_LOGISTIC, #AF_RELU and #AF_SOFTPLUS.
+         *
+         * For details see:
+         *  - X. Glorot and Y. Bengio, "Understanding the difficulty of
+         *    training deep feedforward neural networks", International
+         *    conference on artificial intelligence and statistics. 2010.
+         */
+        MS_GLOROTBENGIO_NORMAL,
         /** Initialize connections according to Nguyen-Widrow scheme.
          *
          * For details see:
@@ -152,29 +186,70 @@ class NeuralNetwork
      *            @f[
      *              \underbrace{
      *                \overbrace{
-     *                  a^{01}_{00}, \ldots, a^{01}_{0m_1},
-     *                  a^{01}_{10}, \ldots, a^{01}_{1m_1},
+     *                  a^{01}_{00}, \ldots, a^{01}_{n_00}, b^{1}_{0}
+     *                }^{\text{Neuron}^{1}_{0}}
+     *                \overbrace{
+     *                  a^{01}_{01}, \ldots, a^{01}_{n_01}, b^{1}_{1}
+     *                }^{\text{Neuron}^{1}_{1}}
+     *                  \ldots,
+     *                \overbrace{
+     *                  a^{01}_{0n_1}, \ldots, a^{01}_{n_0n_1}, b^{1}_{n_1}
+     *                }^{\text{Neuron}^{1}_{n_1}}
+     *              }_{\text{Layer } 0 \rightarrow 1},
+     *              \underbrace{
+     *                a^{12}_{00}, \ldots, b^{2}_{n_2}
+     *              }_{\text{Layer } 1 \rightarrow 2},
+     *              \ldots,
+     *              \underbrace{
+     *                a^{p-1,p}_{00}, \ldots, b^{p}_{n_p}
+     *              }_{\text{Layer } p-1 \rightarrow p}
+     *            @f]
+     *            where @f$a^{i-1, i}_{jk}@f$ is the weight connecting neuron
+     *            @f$j@f$ in layer @f$i-1@f$ to neuron @f$k@f$ in layer
+     *            @f$i@f$ and @f$b^{i}_{k}@f$ is the bias assigned to neuron
+     *            @f$k@f$ in layer @f$i@f$.
+     *
+     *  This ordering scheme is used internally to store weights in memory.
+     *  Because all weights and biases connected to a neuron are aligned in a
+     *  continuous block this memory layout simplifies the implementation of
+     *  node-decoupled Kalman filter training (NDEKF). However, for backward
+     *  compatibility reasons this layout is NOT used for storing weights on
+     *  disk (see setConnectionsAO()).
+     */
+    void                     setConnections(double const* const& connections);
+    /** Set neural network weights and biases (alternative ordering)
+     *
+     * @param[in] connections One-dimensional array with neural network
+     *                        connections in the following order:
+     *            @f[
+     *              \underbrace{
+     *                \overbrace{
+     *                  a^{01}_{00}, \ldots, a^{01}_{0n_1},
+     *                  a^{01}_{10}, \ldots, a^{01}_{1n_1},
      *                  \ldots,
-     *                  a^{01}_{n_00}, \ldots, a^{01}_{n_0m_1},
+     *                  a^{01}_{n_00}, \ldots, a^{01}_{n_0n_1},
      *                }^{\text{Weights}}
      *                \overbrace{
-     *                  b^{1}_{0}, \ldots, b^{1}_{m_1}
+     *                  b^{1}_{0}, \ldots, b^{1}_{n_1}
      *                }^{\text{Biases}}
      *              }_{\text{Layer } 0 \rightarrow 1},
      *              \underbrace{
-     *                a^{12}_{00}, \ldots, b^{2}_{m_2}
+     *                a^{12}_{00}, \ldots, b^{2}_{n_2}
      *              }_{\text{Layer } 1 \rightarrow 2},
      *              \ldots,
      *              \underbrace{
-     *                a^{p-1,p}_{00}, \ldots, b^{p}_{m_p}
+     *                a^{p-1,p}_{00}, \ldots, b^{p}_{n_p}
      *              }_{\text{Layer } p-1 \rightarrow p}
      *            @f]
      *            where @f$a^{i-1, i}_{jk}@f$ is the weight connecting neuron
      *            @f$j@f$ in layer @f$i-1@f$ to neuron @f$k@f$ in layer
      *            @f$i@f$ and @f$b^{i}_{k}@f$ is the bias assigned to neuron
      *            @f$k@f$ in layer @f$i@f$.
+     *
+     *  This memory layout is used when storing weights on disk.
      */
-    void                     setConnections(double const* const& connections);
+    void                     setConnectionsAO(
+                                             double const* const& connections);
     /** Get neural network weights and biases.
      *
      * @param[out] connections One-dimensional array with neural network
@@ -182,6 +257,13 @@ class NeuralNetwork
      *                         #setConnections())
      */
     void                     getConnections(double* connections) const;
+    /** Get neural network weights and biases (alternative ordering).
+     *
+     * @param[out] connections One-dimensional array with neural network
+     *                         connections (same order as described in
+     *                         #setConnectionsAO())
+     */
+    void                     getConnectionsAO(double* connections) const;
     /** Initialize connections with random numbers.
      *
      * @param[in] seed Random number generator seed.
@@ -211,16 +293,28 @@ class NeuralNetwork
                                          ModificationScheme modificationScheme,
                                          double             parameter1,
                                          double             parameter2);
-    /** Set neural network input layer node values.
+    /** Set neural network input layer neuron values.
      *
-     * @param[in] input Input layer node values.
+     * @param[in] input Input layer neuron values.
      */
     void                     setInput(double const* const& input) const;
-    /** Get neural network output layer node values.
+    /** Get neural network output layer neuron values.
      *
-     * @param[out] output Output layer node values.
+     * @param[out] output Output layer neuron values.
      */
     void                     getOutput(double* output) const;
+    /** Get neuron values (activations) for all layers.
+     *
+     * @return Vector of neuron values per layer.
+     */
+    std::vector<
+    std::vector<double>>     getNeuronsPerLayer() const;
+    /** Get gradients (derivatives of output w.r.t. weights) for all layers.
+     *
+     * @return Vector of gradients per layer.
+     */
+    std::vector<
+    std::vector<double>>     getGradientsPerLayer() const;
     /** Propagate input information through all layers.
      *
      * With the input data set by #setInput() this will calculate all remaining
@@ -295,10 +389,20 @@ class NeuralNetwork
     void                     calculateDFdc(double*              dFdc,
                                            double const* const& dGdxyz) const;
     /** Write connections to file.
+     *
+     * __CAUTION__: For compatibility reasons this format is NOT used for
+     * storing NN weights to disk!
      *
      * @param[in,out] file File stream to write to.
      */
     void                     writeConnections(std::ofstream& file) const;
+    /** Write connections to file (alternative ordering).
+     *
+     * This NN weights ordering layout is used when writing weights to disk.
+     *
+     * @param[in,out] file File stream to write to.
+     */
+    void                     writeConnectionsAO(std::ofstream& file) const;
     /** Return gathered neuron statistics.
      *
      * @param[out] count Number of neuron output value calculations.
@@ -333,6 +437,24 @@ class NeuralNetwork
      * Counters and summation variables for neuron statistics are reset.
      */
     void                     resetNeuronStatistics();
+    /** Get layer limits in combined weight vector (for Kalman filter
+     * decoupling setup).
+     *
+     * @return Vector with layer limits as pair (begin, end),
+     * (numLayers - 1) points.
+     */
+    std::vector<std::pair<
+    std::size_t,
+    std::size_t>>            getLayerLimits() const;
+    /** Get neuron limits in combined weight vector (for Kalman filter
+     * decoupling setup).
+     *
+     * @return Vector with neuron limits as pair (begin, end),
+     * (numNeurons - inputLayer->numNeurons) points.
+     */
+    std::vector<std::pair<
+    std::size_t,
+    std::size_t>>            getNeuronLimits() const;
     //void   writeStatus(int, int);
     long                     getMemoryUsage();
     /** Print neural network architecture.
@@ -404,8 +526,13 @@ class NeuralNetwork
     int    numHiddenLayers;
     /// Offset adress of weights per layer in combined weights+bias array.
     int*   weightOffset;
+#ifndef ALTERNATIVE_WEIGHT_ORDERING
+    /// Offset adress of each neuron per layer in combined weights+bias array.
+    int**  biasOffset;
+#else
     /// Offset adress of biases per layer in combined weights+bias array.
     int*   biasOffset;
+#endif
     /// Offset adress of biases per layer in bias only array.
     int*   biasOnlyOffset;
     /// Pointer to input layer.
@@ -480,6 +607,30 @@ class NeuralNetwork
     void   propagateLayer(Layer& layer, Layer& layerPrev);
 };
 
+/** Convert string to activation function.
+ *
+ * @param[in] letter Single character representing activation function.
+ *
+ * @return ActivationFunction corresponding to string.
+ */
+NeuralNetwork::ActivationFunction activationFromString(std::string c);
+/** Convert activation function to string.
+ *
+ * @param[in] af Input ActivationFunction type.
+ *
+ * @return Single char representing activation function.
+ */
+std::string stringFromActivation(NeuralNetwork::ActivationFunction af);
+
+/// Simple class for managing neural network settings.
+struct NeuralNetworkTopology
+{
+    int                                numLayers = 0;
+    std::vector<int>                   numNeuronsPerLayer;
+    std::vector<
+    NeuralNetwork::ActivationFunction> activationFunctionsPerLayer;
+};
+
 }
 
 #endif
diff --git a/src/libnnp/NeuralNetworx.cpp b/src/libnnp/NeuralNetworx.cpp
new file mode 100644
index 000000000..2c8d4e539
--- /dev/null
+++ b/src/libnnp/NeuralNetworx.cpp
@@ -0,0 +1,725 @@
+// n2p2 - A neural network potential package
+// Copyright (C) 2018 Andreas Singraber (University of Vienna)
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+#include "NeuralNetworx.h"
+#include "utility.h"
+#include <algorithm> // std::min, std::max
+#include <iostream>
+#include <stdexcept>
+
+#define EXP_LIMIT 35.0
+
+using namespace std;
+using namespace Eigen;
+using namespace nnp;
+
+NeuralNetworx::NeuralNetworx(vector<size_t>     numNeuronsPerLayer,
+                             vector<Activation> activationPerLayer) :
+    numLayers      (0),
+    numConnections (0),
+    numWeights     (0),
+    numBiases      (0)
+{
+    initialize(numNeuronsPerLayer, activationPerLayer);
+}
+
+NeuralNetworx::NeuralNetworx(vector<size_t> numNeuronsPerLayer,
+                             vector<string> activationStringPerLayer) :
+    numLayers      (0),
+    numConnections (0),
+    numWeights     (0),
+    numBiases      (0)
+{
+    // Set irrelevant input layer activation to linear.
+    activationStringPerLayer.at(0) = "l";
+    vector<Activation> activationPerLayer;
+    for (auto a : activationStringPerLayer) 
+    {
+        activationPerLayer.push_back(activationFromString(a));
+    }
+
+    initialize(numNeuronsPerLayer, activationPerLayer);
+}
+
+NeuralNetworx::Layer::Layer(size_t numNeurons,
+                            size_t numNeuronsPrevLayer,
+                            Activation activation) :
+    numNeurons         (numNeurons),
+    numNeuronsPrevLayer(numNeuronsPrevLayer),
+    fa                 (activation)
+{
+    // Input layer has no previous layer.
+    if (numNeuronsPrevLayer > 0)
+    {
+        w.resize(numNeurons, numNeuronsPrevLayer);
+        b.resize(numNeurons);
+        x.resize(numNeurons);
+        dydx.resize(numNeurons);
+        d2ydx2.resize(numNeurons);
+    }
+    y.resize(numNeurons);
+}
+
+void NeuralNetworx::Layer::propagate(VectorXd const& yp)
+{
+    x = w * yp + b;
+
+    if (fa == Activation::IDENTITY)
+    {
+        y = x;
+        dydx.setOnes();
+        d2ydx2.setZero();
+    }
+    else if (fa == Activation::TANH)
+    {
+        y = x.array().tanh();
+        dydx = 1.0 - y.array() * y.array();
+        d2ydx2 = -2.0 * y.array() * dydx.array();
+    }
+    else if (fa == Activation::LOGISTIC)
+    {
+        y = x.array().unaryExpr(
+            [](double xi)
+            {
+                if      (xi >  EXP_LIMIT) return 1.0;
+                else if (xi < -EXP_LIMIT) return 0.0;
+                else return 1.0 / (1.0 + exp(-xi));
+            });
+        //dydx = (y.array() <= 0.0 || y.array() >= 1.0).select(
+        //    0.0,
+        //    y.array() * (1.0 - y.array()));
+        dydx = y.array().unaryExpr(
+            [](double yi)
+            {
+                if (yi <= 0.0 || yi >= 1.0) return 0.0;
+                else return yi * (1.0 - yi);
+            });
+        //d2ydx2 = (y.array() <= 0.0 || y.array() >= 1.0).select(
+        //    0.0,
+        //    y.array() * (1.0 - y.array()) * (1.0 - 2.0 * y.array()));
+        d2ydx2 = y.array().unaryExpr(
+            [](double yi)
+            {
+                if (yi <= 0.0 || yi >= 1.0) return 0.0;
+                else return yi * (1.0 - yi) * (1.0 - 2.0 * yi);
+                ;
+            });
+    }
+    else if (fa == Activation::SOFTPLUS)
+    {
+        y = x.array().unaryExpr(
+            [](double xi)
+            {
+                if      (xi >  EXP_LIMIT) return xi;
+                else if (xi < -EXP_LIMIT) return 0.0;
+                else return log(1.0 + exp(xi));
+            });
+        dydx = x.array().unaryExpr(
+            [](double xi)
+            {
+                if      (xi >  EXP_LIMIT) return 1.0;
+                else if (xi < -EXP_LIMIT) return 0.0;
+                else return 1.0 / (1.0 + exp(-xi));
+            });
+        d2ydx2 = x.array().unaryExpr(
+            [](double xi)
+            {
+                if      (xi >  EXP_LIMIT) return 0.0;
+                else if (xi < -EXP_LIMIT) return 0.0;
+                else
+                {
+                    double const tmp = 1.0 / (1.0 + exp(-xi));
+                    return tmp * (1.0 - tmp);
+                }
+                ;
+            });
+    }
+    else if (fa == Activation::RELU)
+    {
+        //y = (x.array() > 0.0).select(x, 0.0);
+        y = x.array().unaryExpr(
+            [](double xi)
+            {
+                if (xi > 0.0) return xi;
+                else return 0.0;
+            });
+        //dydx = (x.array() > 0.0).select(VectorXd::Ones(dydx.size()) 0.0);
+        dydx = x.array().unaryExpr(
+            [](double xi)
+            {
+                if (xi > 0.0) return 1.0;
+                else return 0.0;
+            });
+        d2ydx2.setZero();
+    }
+    else if (fa == Activation::GAUSSIAN)
+    {
+        y = (-0.5 * x.array() * x.array()).exp();
+        dydx = -x.array() * y.array();
+        d2ydx2 = (x.array() * x.array() - 1.0) * y.array();
+    }
+    else if (fa == Activation::COS)
+    {
+        y = x.array().cos();
+        dydx = x.array().sin();
+        d2ydx2 = -y.array();
+    }
+    else if (fa == Activation::REVLOGISTIC)
+    {
+        VectorXd const tmp = 1.0 / (1.0 + (-x.array()).exp());
+        y = 1.0 - tmp.array();
+        dydx = tmp.array() * (tmp.array() - 1.0);
+        d2ydx2 = tmp.array() * (tmp.array() - 1.0) * (1.0 - 2.0 * tmp.array());
+    }
+    else if (fa == Activation::EXP)
+    {
+        y = (-x.array()).exp();
+        dydx = -y.array();
+        d2ydx2 = y.array();
+    }
+    else if (fa == Activation::HARMONIC)
+    {
+        y = x.array() * x.array();
+        dydx = 2.0 * x.array();
+        d2ydx2.setConstant(2.0);
+    }
+
+    return;
+}
+
+void NeuralNetworx::Layer::initializeDerivInput()
+{
+    // Multiply each row of weights (i.e. all weights connected to a neuron
+    // of this layer) with derivative of the activation function.
+    dydyi = dydx.asDiagonal() * w;
+
+    return;
+}
+
+void NeuralNetworx::Layer::propagateDerivInput(MatrixXd const& dypdyi)
+{
+    dydyi = dydx.asDiagonal() * (w * dypdyi);
+
+    return;
+}
+
+void NeuralNetworx::setInput(vector<double> const& input)
+{
+    if (input.size() != layers.at(0).numNeurons)
+    {
+        throw runtime_error("ERROR: Input vector size does not match number "
+                            "of input layer neurons.");
+    }
+    Map<VectorXd const> y0(input.data(), input.size());
+    layers.at(0).y = y0;
+
+    return;
+}
+
+void NeuralNetworx::propagate(bool deriv)
+{
+    for (size_t i = 1; i < layers.size(); ++i)
+    {
+        layers.at(i).propagate(layers.at(i - 1).y);
+        if (deriv)
+        {
+            if (i == 1) layers.at(i).initializeDerivInput();
+            else layers.at(i).propagateDerivInput(layers.at(i - 1).dydyi);
+        }
+    }
+
+    return;
+}
+
+void NeuralNetworx::propagate(vector<double> const& input, bool deriv)
+{
+    setInput(input);
+    propagate(deriv);
+
+    return;
+}
+
+void NeuralNetworx::getOutput(vector<double>& output) const
+{
+    output.clear();
+    output.insert(output.begin(),
+                  layers.back().y.data(),
+                  layers.back().y.data() + layers.back().y.size());
+
+    return;
+}
+
+void NeuralNetworx::getDerivInput(vector<vector<double>>& derivInput) const
+{
+    derivInput.clear();
+    derivInput.resize(layers.back().dydyi.rows());
+    for (size_t i = 0; i < derivInput.size(); ++i)
+    {
+        for (size_t j = 0; j < layers.at(0).numNeurons; ++j)
+        {
+            derivInput.at(i).push_back(layers.back().dydyi(i, j));
+        }
+    }
+
+    return;
+}
+
+void NeuralNetworx::setConnections(vector<double> const& connections)
+{
+    if (connections.size() != numConnections)
+    {
+        throw runtime_error("ERROR: Provided vector has incorrect length.\n");
+    }
+
+    Map<VectorXd const> c(connections.data(), connections.size());
+
+    size_t count = 0;
+    for (vector<Layer>::iterator l = layers.begin() + 1;
+         l != layers.end(); ++l)
+    {
+        size_t const n = l->w.cols();
+        for (Index i = 0; i < l->y.size(); ++i)
+        {
+            l->w.row(i) = c.segment(count, n);
+            count += n;
+            l->b(i) = c(count);
+            count++;
+        }
+    }
+
+    return;
+}
+
+void NeuralNetworx::setConnectionsAO(vector<double> const& connections)
+{
+    if (connections.size() != numConnections)
+    {
+        throw runtime_error("ERROR: Provided vector has incorrect length.\n");
+    }
+
+    Map<VectorXd const> c(connections.data(), connections.size());
+
+    size_t count = 0;
+    for (vector<Layer>::iterator l = layers.begin() + 1;
+         l != layers.end(); ++l)
+    {
+        size_t const n = l->w.rows();
+        for (Index i = 0; i < l->w.cols(); ++i)
+        {
+            l->w.col(i) = c.segment(count, n);
+            count += n;
+        }
+        l->b = c.segment(count, l->b.size());
+        count += l->b.size();
+    }
+
+    return;
+}
+
+void NeuralNetworx::getConnections(vector<double>& connections) const
+{
+    if (connections.size() != numConnections)
+    {
+        throw runtime_error("ERROR: Provided vector has incorrect length.\n");
+    }
+
+    Map<VectorXd> c(connections.data(), connections.size());
+
+    size_t count = 0;
+    for (vector<Layer>::const_iterator l = layers.begin() + 1;
+         l != layers.end(); ++l)
+    {
+        size_t const n = l->w.cols();
+        for (Index i = 0; i < l->y.size(); ++i)
+        {
+            c.segment(count, n) = l->w.row(i);
+            count += n;
+            c(count) = l->b(i);
+            count++;
+        }
+    }
+
+    return;
+}
+
+void NeuralNetworx::getConnectionsAO(vector<double>& connections) const
+{
+    if (connections.size() != numConnections)
+    {
+        throw runtime_error("ERROR: Provided vector has incorrect length.\n");
+    }
+
+    Map<VectorXd> c(connections.data(), connections.size());
+
+    size_t count = 0;
+    for (vector<Layer>::const_iterator l = layers.begin() + 1;
+         l != layers.end(); ++l)
+    {
+        size_t const n = l->w.rows();
+        for (Index i = 0; i < l->w.cols(); ++i)
+        {
+            c.segment(count, n) = l->w.col(i);
+            count += n;
+        }
+        c.segment(count, l->b.size()) = l->b;
+        count += l->b.size();
+    }
+
+    return;
+}
+
+void NeuralNetworx::writeConnections(std::ofstream& file) const
+{
+    // File header.
+    vector<string> title;
+    vector<string> colName;
+    vector<string> colInfo;
+    vector<size_t> colSize;
+    title.push_back("Neural network connection values (weights and biases).");
+    colSize.push_back(24);
+    colName.push_back("connection");
+    colInfo.push_back("Neural network connection value.");
+    colSize.push_back(1);
+    colName.push_back("t");
+    colInfo.push_back("Connection type (a = weight, b = bias).");
+    colSize.push_back(9);
+    colName.push_back("index");
+    colInfo.push_back("Index enumerating weights.");
+    colSize.push_back(5);
+    colName.push_back("l_s");
+    colInfo.push_back("Starting point layer (end point layer for biases).");
+    colSize.push_back(5);
+    colName.push_back("n_s");
+    colInfo.push_back("Starting point neuron in starting layer (end point "
+                      "neuron for biases).");
+    colSize.push_back(5);
+    colName.push_back("l_e");
+    colInfo.push_back("End point layer.");
+    colSize.push_back(5);
+    colName.push_back("n_e");
+    colInfo.push_back("End point neuron in end layer.");
+    appendLinesToFile(file,
+                      createFileHeader(title, colSize, colName, colInfo));
+
+    size_t count = 0;
+    for (size_t i = 1; i < layers.size(); ++i)
+    {
+        Layer const& l = layers.at(i);
+        for (size_t j = 0; j < l.numNeurons; j++)
+        {
+            for (size_t k = 0; k < l.numNeuronsPrevLayer; k++)
+            {
+                count++;
+                file << strpr("%24.16E a %9d %5d %5d %5d %5d\n",
+                              l.w(j, k),
+                              count,
+                              i - 1,
+                              k + 1,
+                              i,
+                              j + 1);
+            }
+            count++;
+            file << strpr("%24.16E b %9d %5d %5d\n",
+                          l.b(j),
+                          count,
+                          i,
+                          j + 1);
+        }
+    }
+
+    return;
+}
+
+void NeuralNetworx::writeConnectionsAO(std::ofstream& file) const
+{
+    // File header.
+    vector<string> title;
+    vector<string> colName;
+    vector<string> colInfo;
+    vector<size_t> colSize;
+    title.push_back("Neural network connection values (weights and biases).");
+    colSize.push_back(24);
+    colName.push_back("connection");
+    colInfo.push_back("Neural network connection value.");
+    colSize.push_back(1);
+    colName.push_back("t");
+    colInfo.push_back("Connection type (a = weight, b = bias).");
+    colSize.push_back(9);
+    colName.push_back("index");
+    colInfo.push_back("Index enumerating weights.");
+    colSize.push_back(5);
+    colName.push_back("l_s");
+    colInfo.push_back("Starting point layer (end point layer for biases).");
+    colSize.push_back(5);
+    colName.push_back("n_s");
+    colInfo.push_back("Starting point neuron in starting layer (end point "
+                      "neuron for biases).");
+    colSize.push_back(5);
+    colName.push_back("l_e");
+    colInfo.push_back("End point layer.");
+    colSize.push_back(5);
+    colName.push_back("n_e");
+    colInfo.push_back("End point neuron in end layer.");
+    appendLinesToFile(file,
+                      createFileHeader(title, colSize, colName, colInfo));
+
+    size_t count = 0;
+    for (size_t i = 1; i < layers.size(); ++i)
+    {
+        Layer const& l = layers.at(i);
+        for (size_t j = 0; j < l.numNeuronsPrevLayer; j++)
+        {
+            for (size_t k = 0; k < l.numNeurons; k++)
+            {
+                count++;
+                file << strpr("%24.16E a %9d %5d %5d %5d %5d\n",
+                              l.w(k, j),
+                              count,
+                              i - 1,
+                              j + 1,
+                              i,
+                              k + 1);
+            }
+        }
+        for (size_t j = 0; j < l.numNeurons; j++)
+        {
+            count++;
+            file << strpr("%24.16E b %9d %5d %5d\n",
+                          l.b(j),
+                          count,
+                          i,
+                          j + 1);
+        }
+    }
+
+    return;
+}
+
+map<string, double> NeuralNetworx::getNeuronProperties(size_t layer,
+                                                       size_t neuron) const
+{
+    map<string, double> properties;
+
+    properties["y"] = layers.at(layer).y(neuron);
+    if (layers.at(layer).numNeuronsPrevLayer > 0)
+    {
+        properties["x"]      = layers.at(layer).x(neuron);
+        properties["dydx"]   = layers.at(layer).dydx(neuron);
+        properties["d2ydx2"] = layers.at(layer).d2ydx2(neuron);
+    }
+
+    return properties;
+}
+
+vector<pair<size_t, size_t>> NeuralNetworx::getLayerLimits() const
+{
+    vector<pair<size_t, size_t>> limits;
+
+    for (size_t i = 0; i < numLayers - 2; ++i)
+    {
+        limits.push_back(make_pair(offsetLayer.at(i),
+                                   offsetLayer.at(i + 1) - 1));
+
+    }
+    limits.push_back(make_pair(offsetLayer.at(numLayers - 2),
+                               numConnections - 1));
+
+    return limits;
+}
+
+vector<pair<size_t, size_t>> NeuralNetworx::getNeuronLimits() const
+{
+    vector<pair<size_t, size_t>> limits;
+
+    for (size_t i = 0; i < numLayers - 1; ++i)
+    {
+        size_t const n = layers.at(i + 1).numNeurons;
+        for (size_t j = 0; j < n - 1; ++j)
+        {
+            limits.push_back(make_pair(offsetNeuron.at(i).at(j),
+                                       offsetNeuron.at(i).at(j + 1) - 1));
+        }
+        if (i == numLayers - 1)
+        {
+            limits.push_back(make_pair(offsetNeuron.at(i).at(n - 1),
+                                       numConnections - 1));
+        }
+        else
+        {
+            limits.push_back(make_pair(offsetNeuron.at(i).at(n - 1),
+                                       offsetNeuron.at(i + 1).at(0) - 1));
+        }
+    }
+
+    return limits;
+}
+
+vector<string> NeuralNetworx::info() const
+{
+    vector<string> v;
+    Eigen::Index maxNeurons = 0;
+
+    v.push_back(strpr("Number of layers     : %6zu\n", numLayers));
+    v.push_back(strpr("Number of neurons    : %6zu\n", numNeurons));
+    v.push_back(strpr("Number of connections: %6zu\n", numConnections));
+    v.push_back(strpr("Number of weights    : %6zu\n", numWeights));
+    v.push_back(strpr("Number of biases     : %6zu\n", numBiases));
+    v.push_back(strpr("Architecture    "));
+    for (size_t i = 0; i < layers.size(); ++i)
+    {
+        maxNeurons = max(layers.at(i).y.size(),
+                         maxNeurons);
+    }
+    v.push_back("\n");
+    v.push_back("-----------------------------------------"
+                "--------------------------------------\n");
+
+    for (size_t i = 0; i < static_cast<size_t>(maxNeurons); ++i)
+    {
+        v.push_back(strpr("%4d", i + 1));
+        string s = "";
+        for (size_t j = 0; j < layers.size(); ++j)
+        {
+            if (i < static_cast<size_t>(layers.at(j).y.size()))
+            {
+                if (j == 0)
+                {
+                    s += strpr(" %3s", "G");
+                }
+                else
+                {
+                    s += strpr(" %3s",
+                               stringFromActivation(layers.at(j).fa).c_str());
+                }
+            }
+            else
+            {
+                s += "    ";
+            }
+        }
+        v.push_back(s += "\n");
+    }
+
+    return v;
+}
+
+void NeuralNetworx::initialize(vector<size_t>     numNeuronsPerLayer,
+                               vector<Activation> activationPerLayer)
+{
+    if (numNeuronsPerLayer.size() < 2)
+    {
+        throw runtime_error(strpr("ERROR: Neural network must have at least "
+                                  "two layers (intput = %zu)\n.",
+                                  numNeuronsPerLayer.size()));
+    }
+    if (numNeuronsPerLayer.size() != activationPerLayer.size())
+    {
+        throw runtime_error("ERROR: Number of layers is inconsistent.\n");
+    }
+
+    // Input layer has no neurons from previous layer.
+    numNeuronsPerLayer.insert(numNeuronsPerLayer.begin(), 0);
+    numLayers = 0;
+    layers.clear();
+    for (size_t i = 0; i < activationPerLayer.size(); ++i)
+    {
+        if (numNeuronsPerLayer.at(i + 1) < 1)
+        {
+            throw runtime_error("ERROR: Layer needs at least one neuron.\n");
+        }
+        layers.push_back(Layer(numNeuronsPerLayer.at(i + 1),
+                               numNeuronsPerLayer.at(i),
+                               activationPerLayer.at(i)));
+        numLayers++;
+    }
+
+    // Compute number of connections, weights and biases.
+    numNeurons     = 0;
+    numConnections = 0;
+    numWeights     = 0;
+    numBiases      = 0;
+    for (auto const& l : layers)
+    {
+        numNeurons     += l.y.size();
+        numConnections += l.w.size() + l.b.size();
+        numWeights     += l.w.size();
+        numBiases      += l.b.size();
+    }
+
+    // Compute layer and neuron offsets for combined connections vector.
+    offsetLayer.resize(numLayers - 1, 0);
+    offsetNeuron.resize(numLayers - 1);
+    size_t count = 0;
+    for (size_t i = 1; i < numLayers; ++i)
+    {
+        offsetLayer.push_back(count);
+        for (size_t j = 0; j < layers.at(i).numNeurons; ++j)
+        {
+            offsetNeuron.at(i - 1).push_back(count);
+            // Weights of neuron j.
+            count += layers.at(i).numNeuronsPrevLayer;
+            // Bias of neuron j.
+            count++;
+        }
+    }
+
+    return;
+}
+
+
+string nnp::stringFromActivation(NeuralNetworx::Activation a)
+{
+    string c = "";
+
+    if      (a == NeuralNetworx::Activation::IDENTITY)    c = "l";
+    else if (a == NeuralNetworx::Activation::TANH)        c = "t";
+    else if (a == NeuralNetworx::Activation::LOGISTIC)    c = "s";
+    else if (a == NeuralNetworx::Activation::SOFTPLUS)    c = "p";
+    else if (a == NeuralNetworx::Activation::RELU)        c = "r";
+    else if (a == NeuralNetworx::Activation::GAUSSIAN)    c = "g";
+    else if (a == NeuralNetworx::Activation::COS)         c = "c";
+    else if (a == NeuralNetworx::Activation::REVLOGISTIC) c = "S";
+    else if (a == NeuralNetworx::Activation::EXP)         c = "e";
+    else if (a == NeuralNetworx::Activation::HARMONIC)    c = "h";
+    else
+    {
+        throw runtime_error("ERROR: Unknown activation function.\n");
+    }
+
+    return c;
+}
+
+NeuralNetworx::Activation nnp::activationFromString(string c)
+{
+    NeuralNetworx::Activation a;
+
+    if      (c == "l") a = NeuralNetworx::Activation::IDENTITY;
+    else if (c == "t") a = NeuralNetworx::Activation::TANH;
+    else if (c == "s") a = NeuralNetworx::Activation::LOGISTIC;
+    else if (c == "p") a = NeuralNetworx::Activation::SOFTPLUS;
+    else if (c == "r") a = NeuralNetworx::Activation::RELU;
+    else if (c == "g") a = NeuralNetworx::Activation::GAUSSIAN;
+    else if (c == "c") a = NeuralNetworx::Activation::COS;
+    else if (c == "S") a = NeuralNetworx::Activation::REVLOGISTIC;
+    else if (c == "e") a = NeuralNetworx::Activation::EXP;
+    else if (c == "h") a = NeuralNetworx::Activation::HARMONIC;
+    else
+    {
+        throw runtime_error("ERROR: Unknown activation function.\n");
+    }
+
+    return a;
+}
diff --git a/src/libnnp/NeuralNetworx.h b/src/libnnp/NeuralNetworx.h
new file mode 100644
index 000000000..bde12ec9f
--- /dev/null
+++ b/src/libnnp/NeuralNetworx.h
@@ -0,0 +1,380 @@
+// n2p2 - A neural network potential package
+// Copyright (C) 2018 Andreas Singraber (University of Vienna)
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+#ifndef NEURALNETWORX_H
+#define NEURALNETWORX_H
+
+#include <cstddef> // std::size_t
+#include <map>     // std::map
+#include <string>  // std::string
+#include <utility> // std::pair
+#include <vector>  // std::vector
+#include <Eigen/Core>
+
+namespace nnp
+{
+
+/// This class implements a feed-forward neural network with Eigen.
+class NeuralNetworx
+{
+public:
+
+    /// List of available activation function types.
+    enum class Activation
+    {
+        /// @f$f_a(x) = x@f$
+        IDENTITY,
+        /// @f$f_a(x) = \tanh(x)@f$
+        TANH,
+        /// @f$f_a(x) = 1 / (1 + \mathrm{e}^{-x})@f$
+        LOGISTIC,
+        /// @f$f_a(x) = \ln (1 + \mathrm{e}^x)@f$
+        SOFTPLUS,
+        /// @f$f_a(x) = \max(0, x)@f$ (NOT recommended for HDNNPs!)
+        RELU,
+        /// @f$f_a(x) = \mathrm{e}^{-x^2 / 2}@f$
+        GAUSSIAN,
+        /// @f$f_a(x) = \cos (x)@f$
+        COS,
+        /// @f$f_a(x) = 1 - 1 / (1 + \mathrm{e}^{-x})@f$
+        REVLOGISTIC,
+        /// @f$f_a(x) = \mathrm{e}^{-x}@f$
+        EXP,
+        /// @f$f_a(x) = x^2@f$
+        HARMONIC
+    };
+
+    /// One layer of a feed-forward neural network.
+    struct Layer
+    {
+        /** Constructor for a single layer.
+         *
+         * @param[in] numNeurons Number of neurons in this layer.
+         * @param[in] numNeuronsPrevLayer Number of neurons in previous layer.
+         * @param[in] activation Activation function used in this layer.
+         */
+        Layer(std::size_t numNeurons,
+              std::size_t numNeuronsPrevLayer,
+              Activation  activation);
+        /** Propagate information through this layer.
+         *
+         * @param[in] yp Neuron values from previous layer.
+         */
+        void propagate(Eigen::VectorXd const& yp);
+        /** Initialize #dydyi in this layer.
+         */
+        void initializeDerivInput();
+        /** Propagate derivative information through this layer.
+         *
+         * @param[in] dypdyi Derivatives from previous layer.
+         */
+        void propagateDerivInput(Eigen::MatrixXd const& dypdyi);
+
+        /// Number of neurons in this layer.
+        std:: size_t    numNeurons;
+        /// Number of neurons in previous layer.
+        std:: size_t    numNeuronsPrevLayer;
+        /// Common activation function for all neurons in this layer.
+        Activation      fa;
+        /// Weights from previous to this layer.
+        Eigen::MatrixXd w;
+        /// Biases assigned to neurons in this layer.
+        Eigen::VectorXd b;
+        /// Neuron weighted sum before activation function.
+        Eigen::VectorXd x;
+        /// Neuron values (activation function applied).
+        Eigen::VectorXd y;
+        /// Neuron value derivative w.r.t. activation function argument.
+        Eigen::VectorXd dydx;
+        /// Neuron value second derivative w.r.t. activation function argument.
+        Eigen::VectorXd d2ydx2;
+        /// Derivative of neurons w.r.t. neurons in input layer.
+        Eigen::MatrixXd dydyi;
+    };
+
+    /** Neural network class constructor.
+     *
+     * @param[in] numNeuronsPerLayer Array with number of neurons per layer.
+     * @param[in] activationPerLayer Array with activation function type per
+     *                               layer (note: input layer activation 
+     *                               function is mandatory although it is never
+     *                               used).
+     */
+    NeuralNetworx(std::vector<size_t>     numNeuronsPerLayer,
+                  std::vector<Activation> activationPerLayer);
+    /** Neural network class constructor with strings for activation types.
+     *
+     * @param[in] numNeuronsPerLayer Array with number of neurons per layer.
+     * @param[in] activationStringPerLayer Array with activation function type
+     *                                     per layer (note: input layer
+     *                                     activation function is mandatory
+     *                                     although it is never used).
+     */
+    NeuralNetworx(std::vector<size_t>      numNeuronsPerLayer,
+                  std::vector<std::string> activationStringPerLayer);
+    /** Set input layer neuron values.
+     *
+     * @param[in] input Vector with input neuron values.
+     */
+    void                     setInput(std::vector<double> const& input);
+    /** Propagate information through all layers (input already set).
+     *
+     * @param[in] deriv Propagate also derivative of output w.r.t. input
+     *                  neurons.
+     */
+    void                     propagate(bool deriv = false);
+    /** Propagate information through all layers.
+     *
+     * @param[in] input Vector with input neuron values.
+     * @param[in] deriv Propagate also derivative of output w.r.t. input
+     *                  neurons.
+     */
+    void                     propagate(std::vector<double> const& input,
+                                       bool deriv = false);
+    /** Get output layer neuron values.
+     *
+     * @param[out] output Vector with output layer neuron values.
+     */
+    void                     getOutput(std::vector<double>& output) const;
+    /** Get derivative of output layer neurons w.r.t to input neurons.
+     *
+     * @param[out] derivInput Vector with output layer neuron values.
+     */
+    void                     getDerivInput(std::vector<std::vector<
+                                               double>>& derivInput) const;
+    /** Set neural network weights and biases.
+     *
+     * @param[in] connections One-dimensional vector with neural network
+     *                        connections in the following order:
+     *            @f[
+     *              \underbrace{
+     *                \overbrace{
+     *                  a^{01}_{00}, \ldots, a^{01}_{n_00}, b^{1}_{0}
+     *                }^{\text{Neuron}^{1}_{0}}
+     *                \overbrace{
+     *                  a^{01}_{01}, \ldots, a^{01}_{n_01}, b^{1}_{1}
+     *                }^{\text{Neuron}^{1}_{1}}
+     *                  \ldots,
+     *                \overbrace{
+     *                  a^{01}_{0n_1}, \ldots, a^{01}_{n_0n_1}, b^{1}_{n_1}
+     *                }^{\text{Neuron}^{1}_{n_1}}
+     *              }_{\text{Layer } 0 \rightarrow 1},
+     *              \underbrace{
+     *                a^{12}_{00}, \ldots, b^{2}_{n_2}
+     *              }_{\text{Layer } 1 \rightarrow 2},
+     *              \ldots,
+     *              \underbrace{
+     *                a^{p-1,p}_{00}, \ldots, b^{p}_{n_p}
+     *              }_{\text{Layer } p-1 \rightarrow p}
+     *            @f]
+     *            where @f$a^{i-1, i}_{jk}@f$ is the weight connecting neuron
+     *            @f$j@f$ in layer @f$i-1@f$ to neuron @f$k@f$ in layer
+     *            @f$i@f$ and @f$b^{i}_{k}@f$ is the bias assigned to neuron
+     *            @f$k@f$ in layer @f$i@f$.
+     *
+     *  This ordering scheme is used internally to store weights in memory.
+     *  Because all weights and biases connected to a neuron are aligned in a
+     *  continuous block this memory layout simplifies the implementation of
+     *  node-decoupled Kalman filter training (NDEKF). However, for backward
+     *  compatibility reasons this layout is NOT used for storing weights on
+     *  disk (see setConnectionsAO()).
+     */
+    void                     setConnections(std::vector<double> const&
+                                                connections);
+    /** Set neural network weights and biases (alternative ordering)
+     *
+     * @param[in] connections One-dimensional vector with neural network
+     *                        connections in the following order:
+     *            @f[
+     *              \underbrace{
+     *                \overbrace{
+     *                  a^{01}_{00}, \ldots, a^{01}_{0n_1},
+     *                  a^{01}_{10}, \ldots, a^{01}_{1n_1},
+     *                  \ldots,
+     *                  a^{01}_{n_00}, \ldots, a^{01}_{n_0n_1},
+     *                }^{\text{Weights}}
+     *                \overbrace{
+     *                  b^{1}_{0}, \ldots, b^{1}_{n_1}
+     *                }^{\text{Biases}}
+     *              }_{\text{Layer } 0 \rightarrow 1},
+     *              \underbrace{
+     *                a^{12}_{00}, \ldots, b^{2}_{n_2}
+     *              }_{\text{Layer } 1 \rightarrow 2},
+     *              \ldots,
+     *              \underbrace{
+     *                a^{p-1,p}_{00}, \ldots, b^{p}_{n_p}
+     *              }_{\text{Layer } p-1 \rightarrow p}
+     *            @f]
+     *            where @f$a^{i-1, i}_{jk}@f$ is the weight connecting neuron
+     *            @f$j@f$ in layer @f$i-1@f$ to neuron @f$k@f$ in layer
+     *            @f$i@f$ and @f$b^{i}_{k}@f$ is the bias assigned to neuron
+     *            @f$k@f$ in layer @f$i@f$.
+     *
+     *  This memory layout is used when storing weights on disk.
+     */
+    void                     setConnectionsAO(std::vector<double> const&
+                                                  connections);
+    /** Get neural network weights and biases.
+     *
+     * @param[out] connections One-dimensional vector with neural network
+     *                         connections (same order as described in
+     *                         #setConnections())
+     */
+    void                     getConnections(std::vector<double>&
+                                                connections) const;
+    /** Get neural network weights and biases (alternative ordering).
+     *
+     * @param[out] connections One-dimensional vector with neural network
+     *                         connections (same order as described in
+     *                         #setConnectionsAO())
+     */
+    void                     getConnectionsAO(std::vector<double>&
+                                                  connections) const;
+    /** Write connections to file.
+     *
+     * __CAUTION__: For compatibility reasons this format is NOT used for
+     * storing NN weights to disk!
+     *
+     * @param[in,out] file File stream to write to.
+     */
+    void                     writeConnections(std::ofstream& file) const;
+    /** Write connections to file (alternative ordering).
+     *
+     * This NN weights ordering layout is used when writing weights to disk.
+     *
+     * @param[in,out] file File stream to write to.
+     */
+    void                     writeConnectionsAO(std::ofstream& file) const;
+    /** Get properties for a single neuron.
+     *
+     * @param[in] layer Index of layer (starting from 0 = input layer).
+     * @param[in] neuron Index of neuron (starting from 0 = first neuron in
+     *                   layer).
+     *
+     * @return Map from neuron's property (as string) to value.
+     */
+    std::map<
+    std::string, double>     getNeuronProperties(std::size_t layer,
+                                                 std::size_t neuron) const;
+    /** Get layer limits in combined weight vector (for Kalman filter
+     * decoupling setup).
+     *
+     * @return Vector with layer limits as pair (begin, end),
+     * (numLayers - 1) points.
+     */
+    std::vector<std::pair<
+    std::size_t,
+    std::size_t>>            getLayerLimits() const;
+    /** Get neuron limits in combined weight vector (for Kalman filter
+     * decoupling setup).
+     *
+     * @return Vector with neuron limits as pair (begin, end),
+     * (numNeurons - inputLayer->numNeurons) points.
+     */
+    std::vector<std::pair<
+    std::size_t,
+    std::size_t>>            getNeuronLimits() const;
+    /** Printable strings with neural network architecture information.
+     *
+     * @return Vector of lines for printing.
+     */
+    std::vector<std::string> info() const;
+    /// Getter for #numLayers
+    std::size_t              getNumLayers() const;
+    /// Getter for #numNeurons
+    std::size_t              getNumNeurons() const;
+    /// Getter for #numConnections
+    std::size_t              getNumConnections() const;
+    /// Getter for #numWeights
+    std::size_t              getNumWeights() const;
+    /// Getter for #numBiases
+    std::size_t              getNumBiases() const;
+
+private:
+
+    /** Class initialization.
+     *
+     * @param[in] numNeuronsPerLayer Array with number of neurons per layer.
+     * @param[in] activationPerLayer Array with activation function type per
+     *                               layer (note: input layer activation 
+     *                               function is mandatory although it is never
+     *                               used).
+     *
+     *  Avoid duplicate code due to overloaded constructor.
+     */
+    void initialize(std::vector<size_t>     numNeuronsPerLayer,
+                    std::vector<Activation> activationPerLayer);
+
+    /// Number of neural network layers.
+    std::size_t        numLayers;
+    /// Number of neurons.
+    std::size_t        numNeurons;
+    /// Number of neural network connections (weights + biases).
+    std::size_t        numConnections;
+    /// Number of neural network weights.
+    std::size_t        numWeights;
+    /// Number of neural network biases.
+    std::size_t        numBiases;
+    /// Offset of layers in combined connection vector.
+    std::vector<
+    std::size_t>       offsetLayer;
+    /// Offset of neurons in combined connection vector.
+    std::vector<
+    std::vector<
+    std::size_t>>      offsetNeuron;
+    /// Offset of biases in combined connection vector.
+    /// Vector of neural network layers.
+    std::vector<Layer> layers;
+
+};
+
+/** Convert string to activation function.
+ *
+ * @param[in] letter String representing activation function.
+ *
+ * @return Activation corresponding to string.
+ */
+NeuralNetworx::Activation activationFromString(std::string c);
+/** Convert activation function to string.
+ *
+ * @param[in] af Input ActivationFunction type.
+ *
+ * @return String representing activation function.
+ */
+std::string stringFromActivation(NeuralNetworx::Activation a);
+
+//////////////////////////////////
+// Inlined function definitions //
+//////////////////////////////////
+
+inline
+std::size_t NeuralNetworx::getNumLayers() const {return numLayers;}
+
+inline
+std::size_t NeuralNetworx::getNumNeurons() const {return numNeurons;}
+
+inline
+std::size_t NeuralNetworx::getNumConnections() const {return numConnections;}
+
+inline
+std::size_t NeuralNetworx::getNumWeights() const {return numWeights;}
+
+inline
+std::size_t NeuralNetworx::getNumBiases() const {return numBiases;}
+
+}
+
+#endif
diff --git a/src/libnnp/Settings.cpp b/src/libnnp/Settings.cpp
index 68c0638d8..7f6a2c2ca 100644
--- a/src/libnnp/Settings.cpp
+++ b/src/libnnp/Settings.cpp
@@ -73,6 +73,7 @@ map<string, string> const createKnownKeywordsMap()
     m["jacobian_mode"                 ] = "";
     m["update_strategy"               ] = "";
     m["selection_mode"                ] = "";
+    m["decoupling_type"               ] = "";
     m["task_batch_size_energy"        ] = "";
     m["task_batch_size_force"         ] = "";
     m["gradient_type"                 ] = "";
diff --git a/src/libnnp/makefile b/src/libnnp/makefile
index ba03f653e..c6354e077 100644
--- a/src/libnnp/makefile
+++ b/src/libnnp/makefile
@@ -45,7 +45,7 @@ AR=$(PROJECT_AR)
 ARFLAGS=$(PROJECT_ARFLAGS)
 
 # Extra include paths for compiling.
-INCLUDES=-I./
+INCLUDES=-I./ -I${PROJECT_EIGEN}
 
 
 ###############################################################################
diff --git a/src/libnnptrain/KalmanFilter.cpp b/src/libnnptrain/KalmanFilter.cpp
index ab55a8419..5d02b44c2 100644
--- a/src/libnnptrain/KalmanFilter.cpp
+++ b/src/libnnptrain/KalmanFilter.cpp
@@ -16,9 +16,11 @@
 
 #include "KalmanFilter.h"
 #include "utility.h"
+#include "mpi-extra.h"
 #include <Eigen/LU>
 #include <iostream>
 #include <stdexcept>
+#include <map>       // std::map
 
 using namespace Eigen;
 using namespace std;
@@ -27,8 +29,11 @@ using namespace nnp;
 KalmanFilter::KalmanFilter(size_t const sizeState,
                            KalmanType const type) :
     Updater(sizeState),
+    myRank         (0   ),
+    numProcs       (0   ),
     sizeObservation(0   ),
     numUpdates     (0   ),
+    sizeP          (0   ),
     epsilon        (0.0 ),
     q              (0.0 ),
     q0             (0.0 ),
@@ -62,17 +67,130 @@ KalmanFilter::KalmanFilter(size_t const sizeState,
     w  = new Map<VectorXd      >(0, sizeState);
     xi = new Map<VectorXd const>(0, sizeObservation);
     H  = new Map<MatrixXd const>(0, sizeState, sizeObservation);
-    P.resize(sizeState, sizeState);
-    P.setIdentity();
-    // Prevent problems with unallocated K when log starts.
-    K.resize(sizeState, sizeObservation);
-    K.setZero();
 }
 
 KalmanFilter::~KalmanFilter()
 {
 }
 
+void KalmanFilter::setupMPI(MPI_Comm* communicator)
+{
+    MPI_Comm_dup(*communicator, &comm);
+    MPI_Comm_rank(comm, &myRank);
+    MPI_Comm_size(comm, &numProcs);
+
+    return;
+}
+
+void KalmanFilter::setupDecoupling(vector<pair<size_t, size_t>> groupLimits)
+{
+    this->groupLimits = groupLimits;
+
+    // Check consistency of decoupling group limits.
+    if (groupLimits.at(0).first != 0)
+    {
+        auto const& l = groupLimits.at(0);
+        throw runtime_error(strpr("ERROR: Inconsistent decoupling group "
+                                  "limits, first group must start with index "
+                                  "0 (is %zu).\n",
+                                  l.first));
+
+    }
+    for (size_t i = 0; i < groupLimits.size(); ++i)
+    {
+        auto const& l = groupLimits.at(i);
+        if (l.first > l.second)
+        {
+            throw runtime_error(
+                strpr("ERROR: Inconsistent decoupling group limits, "
+                      "group %zu: start %zu > end %zu.\n",
+                      i, l.first, l.second));
+        }
+    }
+    for (size_t i = 0; i < groupLimits.size() - 1; ++i)
+    {
+        auto const& l = groupLimits.at(i);
+        auto const& r = groupLimits.at(i + 1);
+        if (l.second + 1 != r.first)
+        {
+            throw runtime_error(
+                strpr("ERROR: Inconsistent decoupling group limits, "
+                      "group %zu end %zu + 1 != group %zu start %zu.\n",
+                      i, l.second, i + 1, r.first));
+        }
+    }
+    if (groupLimits.back().second != sizeState - 1)
+    {
+        auto const& l = groupLimits.back();
+        throw runtime_error(strpr("ERROR: Inconsistent decoupling group "
+                                  "limits, last group must end with index "
+                                  "%zu (is %zu).\n",
+                                  sizeState - 1,
+                                  l.second));
+
+    }
+
+    // Distribute groups to MPI procs.
+    numGroupsPerProc.resize(numProcs, 0);
+    groupOffsetPerProc.resize(numProcs, 0);
+    int quotient = groupLimits.size() / numProcs;
+    int remainder = groupLimits.size() % numProcs;
+    int groupSum = 0;
+    for (int i = 0; i < numProcs; ++i)
+    {
+        numGroupsPerProc.at(i) = quotient;
+        if (remainder > 0 && i < remainder)
+        {
+            numGroupsPerProc.at(i)++;
+        }
+        groupOffsetPerProc.at(i) = groupSum;
+        groupSum += numGroupsPerProc.at(i);
+    }
+
+    // Compute state vector segment length and offset for each processor.
+    for (int i = 0; i < numProcs - 1; ++i)
+    {
+        sizeStatePerProc.push_back(
+            groupLimits.at(groupOffsetPerProc.at(i+1)).first -
+            groupLimits.at(groupOffsetPerProc.at(i)).first);
+        offsetStatePerProc.push_back(
+            groupLimits.at(groupOffsetPerProc.at(i)).first);
+    }
+    sizeStatePerProc.push_back(
+        groupLimits.back().second + 1 -
+        groupLimits.at(groupOffsetPerProc.at(numProcs - 1)).first);
+    offsetStatePerProc.push_back(
+        groupLimits.at(groupOffsetPerProc.at(numProcs - 1)).first);
+
+    // Store group index and segment length for this processor.
+    for (size_t i = 0; i < numGroupsPerProc.at(myRank); ++i)
+    {
+        size_t index = groupOffsetPerProc.at(myRank) + i;
+        size_t size = groupLimits.at(index).second
+                    - groupLimits.at(index).first + 1;
+        myGroups.push_back(make_pair(index, size));
+    }
+
+    // Allocate per-processor matrices for all groups.
+    for (auto g : myGroups)
+    {
+        P.push_back(Eigen::MatrixXd(g.second, g.second));
+        P.back().setIdentity();
+
+        X.push_back(Eigen::MatrixXd(g.second, sizeObservation));
+
+        // Prevent problems with unallocated K when log starts.
+        K.push_back(Eigen::MatrixXd(g.second, sizeObservation));
+        K.back().setZero();
+
+        sizeP += g.second * g.second;
+    }
+
+    MPI_Allreduce(MPI_IN_PLACE, &sizeP, 1, MPI_SIZE_T, MPI_SUM, comm);
+
+    return;
+}
+
 void KalmanFilter::setSizeObservation(size_t const size)
 {
     sizeObservation = size;
@@ -125,15 +243,32 @@ void KalmanFilter::update()
 
 void KalmanFilter::update(size_t const sizeObservation)
 {
-    X.resize(sizeState, sizeObservation);
+    MatrixXd A(sizeObservation, sizeObservation);
+    A.setZero();
 
-    // Calculate temporary result.
-    // X = P . H
-    X = P.selfadjointView<Lower>() * (*H);
+    for (size_t i = 0; i < myGroups.size(); ++i)
+    {
+        size_t const& j = myGroups.at(i).first;     // group index
+        size_t const& n = myGroups.at(i).second;    // group size
+        size_t const& s = groupLimits.at(j).first;  // group weight start index
+
+        auto& x = X.at(i);
+        auto& p = P.at(i);
+
+        x.resize(n, sizeObservation);
+
+        // Calculate temporary result.
+        // X = P . H
+        x = p.selfadjointView<Lower>() * H->block(s, 0, n, sizeObservation);
+ 
+        // Calculate scaling matrix.
+        // A = H^T . X
+        // For decoupling summarize scaling matrix here (locally).
+        A += H->block(s, 0, n, sizeObservation).transpose() * x;
+    }
 
-    // Calculate scaling matrix.
-    // A = H^T . X
-    MatrixXd A = H->transpose() * X;
+    // Summarize scaling matrix globally.
+    MPI_Allreduce(MPI_IN_PLACE, A.data(), A.size(), MPI_DOUBLE, MPI_SUM, comm);
 
     // Increase learning rate.
     // eta(n) = eta(0) * exp(n * tau)
@@ -150,38 +285,52 @@ void KalmanFilter::update(size_t const sizeObservation)
         A.diagonal() += VectorXd::Constant(sizeObservation, lambda);
     }
 
-    // Calculate Kalman gain matrix.
-    // K = X . A^-1
-    K.resize(sizeState, sizeObservation);
-    K = X * A.inverse();
-
-    // Update error covariance matrix.
-    // P = P - K . X^T
-    P.noalias() -= K * X.transpose();
-
-    // Apply forgetting factor.
-    if (type == KT_FADINGMEMORY)
+    for (size_t i = 0; i < myGroups.size(); ++i)
     {
-        P *= 1.0 / lambda;
+        size_t const& j = myGroups.at(i).first;     // group index
+        size_t const& n = myGroups.at(i).second;    // group size
+        size_t const& s = groupLimits.at(j).first;  // group weight start index
+
+        auto& x = X.at(i);
+        auto& p = P.at(i);
+        auto& k = K.at(i);
+
+        // Calculate Kalman gain matrix.
+        // K = X . A^-1
+        k.resize(n, sizeObservation);
+        k = x * A.inverse();
+
+        // Update error covariance matrix.
+        // P = P - K . X^T
+        p.noalias() -= k * x.transpose();
+
+        // Apply forgetting factor.
+        if (type == KT_FADINGMEMORY)
+        {
+            p *= 1.0 / lambda;
+        }
+        // Add process noise.
+        // P = P + Q
+        p.diagonal() += VectorXd::Constant(n, q);
+
+        // Update state vector.
+        // w =  w + K . xi
+        w->segment(s, n) += k * (*xi);
+
+        // Anneal process noise.
+        // q(n) = q(0) * exp(-n * tau)
+        if (q > qmin) q *= exp(-qtau);
+
+        // Update forgetting factor.
+        if (type == KT_FADINGMEMORY)
+        {
+            lambda = nu * lambda + 1.0 - nu;
+            gamma = 1.0 / (1.0 + lambda / gamma);
+        }
     }
-    // Add process noise.
-    // P = P + Q
-    P.diagonal() += VectorXd::Constant(sizeState, q);
-
-    // Update state vector.
-    // w =  w + K . xi
-    (*w) += K * (*xi);
 
-    // Anneal process noise.
-    // q(n) = q(0) * exp(-n * tau)
-    if (q > qmin) q *= exp(-qtau);
-
-    // Update forgetting factor.
-    if (type == KT_FADINGMEMORY)
-    {
-        lambda = nu * lambda + 1.0 - nu;
-        gamma = 1.0 / (1.0 + lambda / gamma);
-    }
+    // Communicate new weight segments.
+    MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DOUBLE, w->data(), sizeStatePerProc.data(), offsetStatePerProc.data(), MPI_DOUBLE, comm);
 
     numUpdates++;
 
@@ -206,7 +355,10 @@ void KalmanFilter::setParametersStandard(double const epsilon,
 
     q = q0;
     eta = eta0;
-    P /= epsilon;
+    for (auto& p : P)
+    {
+        p /= epsilon;
+    }
 
     return;
 }
@@ -226,7 +378,10 @@ void KalmanFilter::setParametersFadingMemory(double const epsilon,
     this->nu      = nu     ;
 
     q = q0;
-    P /= epsilon;
+    for (auto& p : P)
+    {
+        p /= epsilon;
+    }
     gamma = 1.0;
 
     return;
@@ -235,12 +390,30 @@ void KalmanFilter::setParametersFadingMemory(double const epsilon,
 string KalmanFilter::status(size_t epoch) const
 {
 
-    double Pasym = 0.5 * (P - P.transpose()).array().abs().mean();
-    double Pdiag = P.diagonal().array().abs().sum(); 
-    double Poffdiag = (P.array().abs().sum() - Pdiag)
-                   / (sizeState * (sizeState - 1));
+    double Pasym    = 0.0;
+    double Pdiag    = 0.0;
+    double Poffdiag = 0.0;
+    double Kmean    = 0.0;
+
+    for (size_t i = 0; i < myGroups.size(); ++i)
+    {
+        auto const& p = P.at(i);
+        auto const& k = K.at(i);
+        Pasym += 0.5 * (p - p.transpose()).array().abs().sum();
+        double pdiag = p.diagonal().array().abs().sum(); 
+        Pdiag += pdiag; 
+        Poffdiag += p.array().abs().sum() - pdiag;
+        Kmean += k.array().abs().sum();
+    }
+
+    MPI_Allreduce(MPI_IN_PLACE, &Pasym   , 1, MPI_DOUBLE, MPI_SUM, comm);
+    MPI_Allreduce(MPI_IN_PLACE, &Pdiag   , 1, MPI_DOUBLE, MPI_SUM, comm);
+    MPI_Allreduce(MPI_IN_PLACE, &Poffdiag, 1, MPI_DOUBLE, MPI_SUM, comm);
+    MPI_Allreduce(MPI_IN_PLACE, &Kmean   , 1, MPI_DOUBLE, MPI_SUM, comm);
+    Pasym /= (sizeP - sizeState);
     Pdiag /= sizeState;
-    double Kmean = K.array().abs().mean();
+    Poffdiag /= (sizeP - sizeState);
+    Kmean /= (sizeState * sizeObservation);
 
     string s = strpr("%10zu %10zu %16.8E %16.8E %16.8E %16.8E %16.8E",
                      epoch, numUpdates, Pdiag, Poffdiag, Pasym, Kmean, q);
@@ -314,11 +487,24 @@ vector<string> KalmanFilter::info() const
 {
     vector<string> v;
 
+    v.push_back("Extended Kalman Filter (EKF) optimizer:\n");
+    v.push_back(strpr("sizeState       = %zu\n", sizeState));
+    v.push_back(strpr("sizeObservation = %zu\n", sizeObservation));
+    v.push_back(strpr("myRank          = %d\n", myRank));
+    v.push_back(strpr("numProcs        = %d\n", numProcs));
+    v.push_back(strpr("OpenMP threads used                    : %d\n",
+                      nbThreads()));
+    v.push_back(strpr("Number of decoupling groups            : %zu\n",
+                      groupLimits.size()));
+    v.push_back(strpr("Number of decoupling groups of proc %3d: %zu\n",
+                      myRank, myGroups.size()));
+    v.push_back(strpr("P matrix size ratio (compared to GEKF) : %6.2f %%\n",
+                      100.0 * sizeP / (sizeState * sizeState)));
+    v.push_back("-----------------------------------------"
+                "--------------------------------------\n");
     if (type == KT_STANDARD)
     {
         v.push_back(strpr("KalmanType::KT_STANDARD (%d)\n", type));
-        v.push_back(strpr("sizeState       = %zu\n", sizeState));
-        v.push_back(strpr("sizeObservation = %zu\n", sizeObservation));
         v.push_back(strpr("epsilon         = %12.4E\n", epsilon));
         v.push_back(strpr("q0              = %12.4E\n", q0     ));
         v.push_back(strpr("qtau            = %12.4E\n", qtau   ));
@@ -330,8 +516,6 @@ vector<string> KalmanFilter::info() const
     else if (type == KT_FADINGMEMORY)
     {
         v.push_back(strpr("KalmanType::KT_FADINGMEMORY (%d)\n", type));
-        v.push_back(strpr("sizeState       = %zu\n", sizeState));
-        v.push_back(strpr("sizeObservation = %zu\n", sizeObservation));
         v.push_back(strpr("epsilon         = %12.4E\n", epsilon));
         v.push_back(strpr("q0              = %12.4E\n", q0     ));
         v.push_back(strpr("qtau            = %12.4E\n", qtau   ));
@@ -339,7 +523,17 @@ vector<string> KalmanFilter::info() const
         v.push_back(strpr("lambda          = %12.4E\n", lambda));
         v.push_back(strpr("nu              = %12.4E\n", nu    ));
     }
-    v.push_back(strpr("OpenMP threads used: %d\n", nbThreads()));
+    //for (size_t i = 0; i < groupLimits.size(); ++i)
+    //{
+    //    v.push_back(strpr(" - group %5zu size: %zu\n",
+    //                      i,
+    //                      groupLimits.at(i).second
+    //                      - groupLimits.at(i).first + 1));
+    //}
+    //for (auto g : myGroups)
+    //{
+    //    v.push_back(strpr(" - group %5zu size: %zu\n", g.first, g.second));
+    //}
 
     return v;
 }
diff --git a/src/libnnptrain/KalmanFilter.h b/src/libnnptrain/KalmanFilter.h
index 356cee949..0d409ac1c 100644
--- a/src/libnnptrain/KalmanFilter.h
+++ b/src/libnnptrain/KalmanFilter.h
@@ -22,6 +22,7 @@
 #include <Eigen/Core>
 #include <cstddef>
 #include <string>
+#include <utility>
 #include <vector>
 
 namespace nnp
@@ -50,6 +51,19 @@ class KalmanFilter : public Updater
     /** Destructor.
      */
     virtual ~KalmanFilter();
+    /** Basic MPI setup.
+     *
+     * @param[in] communicator Input communicator of calling program.
+     */
+    void                     setupMPI(MPI_Comm* communicator);
+    /** Set up Kalman filter decoupling.
+     *
+     * @param[in] groupLimits Vector containing starting and ending indices of
+     *                        each decoupling group.
+     */
+    void                     setupDecoupling(
+                                 std::vector<std::pair< std::size_t,
+                                     std::size_t>> groupLimits);
     /** Set observation vector size.
      *
      * @param[in] size Size of the observation vector.
@@ -194,10 +208,16 @@ class KalmanFilter : public Updater
 private:
     /// Kalman filter type.
     KalmanType                         type;
+    /// My process ID.
+    int                                myRank;
+    /// Total number of MPI processors.
+    int                                numProcs;
     /// Size of observation (measurement) vector.
     std::size_t                        sizeObservation;
     /// Total number of updates performed.
     std::size_t                        numUpdates;
+    /// Number of covariance matrix entries in memory (sum over all groups).
+    std::size_t                        sizeP;
     /// Error covariance initialization parameter @f$\epsilon@f$.
     double                             epsilon;
     /// Process noise @f$q@f$.
@@ -222,18 +242,34 @@ class KalmanFilter : public Updater
     double                             nu;
     /// Forgetting gain factor gamma for fading memory Kalman filter.
     double                             gamma;
+    /// Size of state vector segment handled by each processor.
+    std::vector<int>                   sizeStatePerProc;
+    /// Offset of state vector segment handled by each processor.
+    std::vector<int>                   offsetStatePerProc;
+    /// List of (group indices, group sizes) of this processor.
+    std::vector<std::pair<
+    std::size_t, std::size_t>>         myGroups;
+    /// Number of groups for each processor.
+    std::vector<std::size_t>           numGroupsPerProc;
+    /// Offset in terms of groups for each processor.
+    std::vector<std::size_t>           groupOffsetPerProc;
+    /// Decoupling group limits, i.e. starting end ending indices.
+    std::vector<std::pair<
+    std::size_t, std::size_t>>         groupLimits;
     /// State vector.
     Eigen::Map<Eigen::VectorXd>*       w;
     /// Error vector.
     Eigen::Map<Eigen::VectorXd const>* xi;
     /// Derivative matrix.
     Eigen::Map<Eigen::MatrixXd const>* H;
-    /// Error covariance matrix.
-    Eigen::MatrixXd                    P;
+    /// Error covariance matrices for all groups.
+    std::vector<Eigen::MatrixXd>       P;
     /// Kalman gain matrix.
-    Eigen::MatrixXd                    K;
+    std::vector<Eigen::MatrixXd>       K;
     /// Intermediate result X = P . H.
-    Eigen::MatrixXd                    X;
+    std::vector<Eigen::MatrixXd>       X;
+    /// Global MPI communicator.
+    MPI_Comm                           comm;
 };
 
 //////////////////////////////////
diff --git a/src/libnnptrain/Training.cpp b/src/libnnptrain/Training.cpp
index ba293c7d0..3b334d72d 100644
--- a/src/libnnptrain/Training.cpp
+++ b/src/libnnptrain/Training.cpp
@@ -40,6 +40,7 @@ Training::Training() : Dataset(),
                        jacobianMode               (JM_SUM         ),
                        updateStrategy             (US_COMBINED    ),
                        selectionMode              (SM_RANDOM      ),
+                       decouplingType             (DT_GLOBAL      ),
                        hasUpdaters                (false          ),
                        hasStructures              (false          ),
                        useForces                  (false          ),
@@ -300,7 +301,13 @@ void Training::initializeWeights()
                 w.at(i).at(j) = minWeights + gsl_rng_uniform(rngGlobal)
                               * (maxWeights - minWeights);
             }
-            elements.at(i).neuralNetwork->setConnections(&(w.at(i).front()));
+#ifndef ALTERNATIVE_WEIGHT_ORDERING
+            // To be consistent this should actually be the non-AO version,
+            // but this way results stay comparable.
+            elements.at(i).neuralNetwork->setConnectionsAO(&(w.at(i).front()));
+#else
+            elements.at(i).neuralNetwork->setConnectionsAO(&(w.at(i).front()));
+#endif
         }
         if (settings.keywordExists("nguyen_widrow_weights_short"))
         {
@@ -330,7 +337,7 @@ void Training::initializeWeights()
                  it != elements.end(); ++it)
             {
                 it->neuralNetwork->
-                    modifyConnections(NeuralNetwork::MS_GLOROTBENGIO);
+                    modifyConnections(NeuralNetwork::MS_GLOROTBENGIO_UNIFORM);
                 //it->neuralNetwork->
                 //    modifyConnections(NeuralNetwork::MS_ZEROOUTPUTWEIGHTS);
                 it->neuralNetwork->
@@ -991,6 +998,44 @@ void Training::setupTraining()
     {
         kalmanType = (KalmanFilter::KalmanType)
                      atoi(settings["kalman_type"].c_str());
+        decouplingType = (DecouplingType)
+                         atoi(settings["decoupling_type"].c_str());
+        if (decouplingType == DT_GLOBAL)
+        {
+            log << strpr("No decoupling (GEKF) selected: DT_GLOBAL (%d).\n",
+                         decouplingType);
+        }
+        else if (decouplingType == DT_ELEMENT)
+        {
+            log << strpr("Per-element decoupling (ED-GEKF) selected: "
+                         "DT_ELEMENT (%d).\n", decouplingType);
+        }
+        else if (decouplingType == DT_LAYER)
+        {
+            log << strpr("Per-layer decoupling selected: "
+                         "DT_LAYER (%d).\n", decouplingType);
+        }
+        else if (decouplingType == DT_NODE)
+        {
+            log << strpr("Per-node decoupling (NDEKF) selected: "
+                         "DT_NODE (%d).\n", decouplingType);
+        }
+        else if (decouplingType == DT_FULL)
+        {
+            log << strpr("Full (per-weight) decoupling selected: "
+                         "DT_FULL (%d).\n", decouplingType);
+        }
+        else
+        {
+            throw runtime_error("ERROR: Unknown Kalman filter decoupling "
+                                "type.\n");
+        }
+        if (decouplingType != DT_GLOBAL && updateStrategy != US_COMBINED)
+        {
+            throw runtime_error(strpr("ERROR: Kalman filter decoupling works "
+                                      "only in conjunction with "
+                                      "update_strategy %d\".\n", US_COMBINED));
+        }
     }
 
     for (size_t i = 0; i < numUpdaters; ++i)
@@ -1008,6 +1053,81 @@ void Training::setupTraining()
                 updaters.push_back(
                     (Updater*)new KalmanFilter(numWeightsPerUpdater.at(i),
                                                kalmanType));
+                KalmanFilter* u = dynamic_cast<KalmanFilter*>(updaters.back());
+                if (parallelMode == PM_TRAIN_ALL)
+                {
+                    u->setupMPI(&comm);
+                }
+                else
+                {
+                    MPI_Group groupWorld;
+                    MPI_Comm_group(comm, &groupWorld);
+                    int const size0 = 1;
+                    int const rank0[1] = {0};
+                    MPI_Group groupRank0;
+                    MPI_Group_incl(groupWorld, size0, rank0, &groupRank0);
+                    MPI_Comm commRank0;
+                    MPI_Comm_create_group(comm, groupRank0, 0, &commRank0);
+                    u->setupMPI(&commRank0);
+                }
+                vector<pair<size_t, size_t>> limits;
+                if (decouplingType == DT_GLOBAL)
+                {
+                    limits.push_back(
+                        make_pair(0, numWeightsPerUpdater.at(i) - 1));
+                }
+                else if (decouplingType == DT_ELEMENT)
+                {
+                    for (size_t j = 0; j < numElements; ++j)
+                    {
+                        limits.push_back(make_pair(
+                            weightsOffset.at(j),
+                            weightsOffset.at(j) + elements.at(j).
+                                neuralNetwork->getNumConnections() - 1));
+                    }
+                }
+                else if (decouplingType == DT_LAYER)
+                {
+                    for (size_t j = 0; j < numElements; ++j)
+                    {
+                        for (auto l : elements.at(j).neuralNetwork
+                                      ->getLayerLimits())
+                        {
+                            limits.push_back(make_pair(
+                                weightsOffset.at(j) + l.first,
+                                weightsOffset.at(j) + l.second));
+                        }
+                    }
+                }
+                else if (decouplingType == DT_NODE)
+                {
+#ifndef ALTERNATIVE_WEIGHT_ORDERING
+                    for (size_t j = 0; j < numElements; ++j)
+                    {
+                        for (auto l : elements.at(j).neuralNetwork
+                                      ->getNeuronLimits())
+                        {
+                            limits.push_back(make_pair(
+                                weightsOffset.at(j) + l.first,
+                                weightsOffset.at(j) + l.second));
+                        }
+                    }
+#else
+                    throw runtime_error("ERROR: Node-decoupled Kalman filter "
+                                        "(NDEFK) training not possible with "
+                                        "alternative (old) weight memory "
+                                        "layout, recompile without "
+                                        "-DALTERNATIVE_WEIGHT_ORDERING.\n");
+#endif
+                }
+                else if (decouplingType == DT_FULL)
+                {
+                    for (size_t j = 0; j < numWeightsPerUpdater.at(i); ++j)
+                    {
+                        limits.push_back(make_pair(j, j));
+                    }
+                }
+                u->setupDecoupling(limits);
             }
             updaters.back()->setState(&(weights.at(i).front()));
         }
@@ -1432,7 +1552,7 @@ void Training::calculateErrorEpoch()
         fileNameForcesTest = strpr("testforces.%06zu.out", epoch);
     }
 
-    // Calculate RMSE and write comparison files.
+    // Calculate error and write comparison files.
     calculateError(true,
                    identifier,
                    fileNameEnergiesTrain,
@@ -1452,7 +1572,9 @@ void Training::writeWeights(string const fileNameFormat) const
         string fileName = strpr(fileNameFormat.c_str(),
                                 elements.at(i).getAtomicNumber());
         file.open(fileName.c_str());
-        elements.at(i).neuralNetwork->writeConnections(file);
+        // Attention: need alternative (old) ordering scheme here for
+        // backward compatibility!
+        elements.at(i).neuralNetwork->writeConnectionsAO(file);
         file.close();
     }
 
@@ -1629,7 +1751,7 @@ void Training::writeNeuronStatistics(string const fileName) const
         vector<string> colInfo;
         vector<size_t> colSize;
         title.push_back("Statistics for individual neurons gathered during "
-                        "RMSE calculation.");
+                        "error calculation.");
         colSize.push_back(10);
         colName.push_back("element");
         colInfo.push_back("Element index.");
@@ -1743,24 +1865,31 @@ void Training::writeUpdaterStatus(bool         append,
 
     for (size_t i = 0; i < numUpdaters; ++i)
     {
-        string fileName;
-        if (updateStrategy == US_COMBINED)
-        {
-            fileName = strpr(fileNameFormat.c_str(), 0);
-        }
-        else if (updateStrategy == US_ELEMENT)
+        if (myRank == 0)
         {
-            fileName = strpr(fileNameFormat.c_str(),
-                             elementMap.atomicNumber(i));
+            string fileName;
+            if (updateStrategy == US_COMBINED)
+            {
+                fileName = strpr(fileNameFormat.c_str(), 0);
+            }
+            else if (updateStrategy == US_ELEMENT)
+            {
+                fileName = strpr(fileNameFormat.c_str(),
+                                 elementMap.atomicNumber(i));
+            }
+            if (append) file.open(fileName.c_str(), ofstream::app);
+            else
+            {
+                file.open(fileName.c_str());
+                appendLinesToFile(file, updaters.at(i)->statusHeader());
+            }
+            file << updaters.at(i)->status(epoch);
+            file.close();
         }
-        if (append) file.open(fileName.c_str(), ofstream::app);
-        else 
+        else if (parallelMode == PM_TRAIN_ALL)
         {
-            file.open(fileName.c_str());
-            appendLinesToFile(file, updaters.at(i)->statusHeader());
+            updaters.at(i)->status(epoch);
         }
-        file << updaters.at(i)->status(epoch);
-        file.close();
     }
 
     return;
@@ -1884,7 +2013,7 @@ void Training::loop()
     else if (errorMetricForces == 1) metric = "MAE";
     if (errorMetricEnergies % 2 == 0) peratom = "per atom ";
 
-    log << "The training loop output covers different RMSEs, update and\n";
+    log << "The training loop output covers different errors, update and\n";
     log << "timing information. The following quantities are organized\n";
     log << "according to the matrix scheme below:\n";
     log << "-------------------------------------------------------------------\n";
@@ -1911,7 +2040,7 @@ void Training::loop()
     log << "Abbreviations:\n";
     log << "  p. u. = physical units.\n";
     log << "  i. u. = internal units.\n";
-    log << "Note: RMSEs in internal units (columns 5 + 6) are only present \n";
+    log << "Note: Errors in internal units (columns 5 + 6) are only present \n";
     log << "      if data set normalization is used.\n";
     log << "-------------------------------------------------------------------\n";
     log << "     1    2             3             4             5             6\n";
@@ -1940,8 +2069,9 @@ void Training::loop()
     // Write learning curve.
     if (myRank == 0) writeLearningCurve(false);
 
-    // Write updater status to file.
-    if (myRank == 0) writeUpdaterStatus(false);
+    // Write updater status to file (decoupled KF requires all tasks to execute
+    // the status() function.
+    writeUpdaterStatus(false);
 
     // Write neuron statistics.
     writeNeuronStatisticsEpoch();
@@ -2013,7 +2143,7 @@ void Training::loop()
         if (myRank == 0) writeLearningCurve(true);
 
         // Write updater status to file.
-        if (myRank == 0) writeUpdaterStatus(true);
+        writeUpdaterStatus(true);
 
         // Write neuron statistics.
         writeNeuronStatisticsEpoch();
@@ -2280,6 +2410,22 @@ void Training::update(bool force)
             else
             {
                 nn->calculateDEdc(&(dXdc.at(i).front()));
+                //vector<double> temp(dXdc.at(i).size(), 0.0);
+                //vector<double> tempAO(dXdc.at(i).size(), 0.0);
+                //nn->calculateDEdc(&(temp.front()));
+                //ofstream tf("dedc.out");
+                //size_t count;
+                //count = 0;
+                //sort(temp.begin(), temp.end());
+                //for (auto i : temp) tf << strpr("%zu %16.8E\n", count++, i);
+                //tf.close();
+                //nn->calculateDEdcAO(&(tempAO.front()));
+                //tf.open("dedc.out.ao");
+                //count = 0;
+                //sort(tempAO.begin(), tempAO.end());
+                //for (auto i : tempAO) tf << strpr("%zu %16.8E\n", count++, i);
+                //tf.close();
+                //throw runtime_error("END HERE");
             }
             // Finally sum up Jacobian.
             if (updateStrategy == US_ELEMENT) iu = i;
@@ -2715,7 +2861,11 @@ void Training::getWeights()
         for (size_t i = 0; i < numElements; ++i)
         {
             NeuralNetwork const* const& nn = elements.at(i).neuralNetwork;
+#ifndef ALTERNATIVE_WEIGHT_ORDERING
             nn->getConnections(&(weights.at(0).at(pos)));
+#else
+            nn->getConnectionsAO(&(weights.at(0).at(pos)));
+#endif
             pos += nn->getNumConnections();
         }
     }
@@ -2724,7 +2874,11 @@ void Training::getWeights()
         for (size_t i = 0; i < numElements; ++i)
         {
             NeuralNetwork const* const& nn = elements.at(i).neuralNetwork;
+#ifndef ALTERNATIVE_WEIGHT_ORDERING
             nn->getConnections(&(weights.at(i).front()));
+#else
+            nn->getConnectionsAO(&(weights.at(i).front()));
+#endif
         }
     }
 
@@ -2739,7 +2893,11 @@ void Training::setWeights()
         for (size_t i = 0; i < numElements; ++i)
         {
             NeuralNetwork* const& nn = elements.at(i).neuralNetwork;
+#ifndef ALTERNATIVE_WEIGHT_ORDERING
             nn->setConnections(&(weights.at(0).at(pos)));
+#else
+            nn->setConnectionsAO(&(weights.at(0).at(pos)));
+#endif
             pos += nn->getNumConnections();
         }
     }
@@ -2748,7 +2906,11 @@ void Training::setWeights()
         for (size_t i = 0; i < numElements; ++i)
         {
             NeuralNetwork* const& nn = elements.at(i).neuralNetwork;
+#ifndef ALTERNATIVE_WEIGHT_ORDERING
             nn->setConnections(&(weights.at(i).front()));
+#else
+            nn->setConnectionsAO(&(weights.at(i).front()));
+#endif
         }
     }
 
diff --git a/src/libnnptrain/Training.h b/src/libnnptrain/Training.h
index 1ae678255..93022ebc8 100644
--- a/src/libnnptrain/Training.h
+++ b/src/libnnptrain/Training.h
@@ -110,6 +110,21 @@ class Training : public Dataset
         SM_THRESHOLD
     };
 
+    /// Kalman filter decoupling type (requires UT_KF and US_COMBINED).
+    enum DecouplingType
+    {
+        /// No decoupling, global extended Kalman filter (GEKF).
+        DT_GLOBAL,
+        /// Per-element decoupling, ED-GEKF (doi.org/10.1021/acs.jctc.5b00211).
+        DT_ELEMENT,
+        /// Layer decoupling.
+        DT_LAYER,
+        /// Node decoupling, NDEKF.
+        DT_NODE,
+        /// Full decoupling, FDEKF.
+        DT_FULL
+    };
+
     /// Constructor.
     Training();
     /// Destructor, updater vector needs to be cleaned.
@@ -309,6 +324,8 @@ class Training : public Dataset
     UpdateStrategy                updateStrategy;
     /// Selection mode for update candidates.
     SelectionMode                 selectionMode;
+    /// Kalman filter decoupling type if KF training selected.
+    DecouplingType                decouplingType;
     /// If this rank performs weight updates.
     bool                          hasUpdaters;
     /// If this rank holds structure information.
diff --git a/src/makefile.gnu b/src/makefile.gnu
index 223a03485..ece0147ce 100644
--- a/src/makefile.gnu
+++ b/src/makefile.gnu
@@ -56,3 +56,6 @@ PROJECT_OPTIONS+= -DEIGEN_DONT_PARALLELIZE
 
 # Use improved memory layout for symmetry function derivatives.
 PROJECT_OPTIONS+= -DIMPROVED_SFD_MEMORY
+
+# Use alternative (old) ordering scheme of weights (NDEKF not possible).
+#PROJECT_OPTIONS+= -DALTERNATIVE_WEIGHT_ORDERING
diff --git a/src/makefile.intel b/src/makefile.intel
index 17e61b7a0..4a9a0e94c 100644
--- a/src/makefile.intel
+++ b/src/makefile.intel
@@ -57,3 +57,6 @@ PROJECT_OPTIONS+= -DEIGEN_DONT_PARALLELIZE
 
 # Use improved memory layout for symmetry function derivatives.
 PROJECT_OPTIONS+= -DIMPROVED_SFD_MEMORY
+
+# Use alternative (old) ordering scheme of weights (NDEKF not possible).
+#PROJECT_OPTIONS+= -DALTERNATIVE_WEIGHT_ORDERING
diff --git a/src/makefile.llvm b/src/makefile.llvm
index b748ce08b..30deb9a03 100644
--- a/src/makefile.llvm
+++ b/src/makefile.llvm
@@ -58,3 +58,6 @@ PROJECT_OPTIONS+= -DEIGEN_DONT_PARALLELIZE
 
 # Use improved memory layout for symmetry function derivatives.
 PROJECT_OPTIONS+= -DIMPROVED_SFD_MEMORY
+
+# Use alternative (old) ordering scheme of weights (NDEKF not possible).
+#PROJECT_OPTIONS+= -DALTERNATIVE_WEIGHT_ORDERING
diff --git a/test/cpp/ExampleNeuralNetworx.h b/test/cpp/ExampleNeuralNetworx.h
new file mode 100644
index 000000000..b88cda8ee
--- /dev/null
+++ b/test/cpp/ExampleNeuralNetworx.h
@@ -0,0 +1,215 @@
+#ifndef EXAMPLENEURALNETWORX_H
+#define EXAMPLENEURALNETWORX_H
+
+#include <vector>
+#include <utility>
+#include "Example.h"
+#include "BoostDataContainer.h"
+#include "NeuralNetworx.h"
+
+struct ExampleNeuralNetworx : public Example
+{
+    std::size_t                                 numLayers;
+    std::size_t                                 numConnections;
+    std::size_t                                 numWeights;
+    std::size_t                                 numBiases;
+    std::vector<std::size_t>                    numNeuronsPerLayer;
+    std::vector<std::pair<std::size_t,
+                          std::size_t>>         limitsLayers;
+    std::vector<std::pair<std::size_t,
+                          std::size_t>>         limitsNeurons;
+    std::vector<nnp::NeuralNetworx::Activation> activationPerLayer;
+    std::vector<std::string>                    activationStringPerLayer;
+
+    ExampleNeuralNetworx();
+
+    ExampleNeuralNetworx(std::string name) : numLayers     (0),
+                                             numConnections(0),
+                                             numWeights    (0),
+                                             numBiases     (0)
+    {
+        this->name = name;
+        this->description = std::string("NeuralNetworx example \"")
+                          + this->name
+                          + "\"";
+    }
+};
+
+template<>
+void BoostDataContainer<ExampleNeuralNetworx>::setup()
+{
+    ExampleNeuralNetworx* e = nullptr;
+
+    examples.push_back(ExampleNeuralNetworx("3-layer, single output"));
+    e = &(examples.back());
+    e->numNeuronsPerLayer = {2, 3, 1};
+    e->activationStringPerLayer = {"l", "t", "p"};
+    e->activationPerLayer = {nnp::NeuralNetworx::Activation::IDENTITY,
+                             nnp::NeuralNetworx::Activation::TANH,
+                             nnp::NeuralNetworx::Activation::SOFTPLUS};
+    e->numLayers = 3;
+    e->numConnections = 13;
+    e->numWeights = 9;
+    e->numBiases = 4;
+    e->limitsLayers.push_back(std::make_pair(0, 8));
+    e->limitsLayers.push_back(std::make_pair(9, 12));
+    e->limitsNeurons.push_back(std::make_pair(0, 2));
+    e->limitsNeurons.push_back(std::make_pair(3, 5));
+    e->limitsNeurons.push_back(std::make_pair(6, 8));
+    e->limitsNeurons.push_back(std::make_pair(9, 12));
+
+    examples.push_back(ExampleNeuralNetworx("4-layer, single output"));
+    e = &(examples.back());
+    e->numNeuronsPerLayer = {10, 20, 15, 1};
+    e->activationStringPerLayer = {"l", "t", "p", "l"};
+    e->activationPerLayer = {nnp::NeuralNetworx::Activation::IDENTITY,
+                             nnp::NeuralNetworx::Activation::TANH,
+                             nnp::NeuralNetworx::Activation::SOFTPLUS,
+                             nnp::NeuralNetworx::Activation::IDENTITY};
+    e->numLayers = 4;
+    e->numConnections = 551;
+    e->numWeights = 515;
+    e->numBiases = 36;
+    e->limitsLayers.push_back(std::make_pair(0, 219));
+    e->limitsLayers.push_back(std::make_pair(220, 534));
+    e->limitsLayers.push_back(std::make_pair(535, 550));
+    e->limitsNeurons.push_back(std::make_pair(0, 10)); // layer 2
+    e->limitsNeurons.push_back(std::make_pair(11, 21));
+    e->limitsNeurons.push_back(std::make_pair(22, 32));
+    e->limitsNeurons.push_back(std::make_pair(33, 43));
+    e->limitsNeurons.push_back(std::make_pair(44, 54));
+    e->limitsNeurons.push_back(std::make_pair(55, 65));
+    e->limitsNeurons.push_back(std::make_pair(66, 76));
+    e->limitsNeurons.push_back(std::make_pair(77, 87));
+    e->limitsNeurons.push_back(std::make_pair(88, 98));
+    e->limitsNeurons.push_back(std::make_pair(99, 109));
+    e->limitsNeurons.push_back(std::make_pair(110, 120));
+    e->limitsNeurons.push_back(std::make_pair(121, 131));
+    e->limitsNeurons.push_back(std::make_pair(132, 142));
+    e->limitsNeurons.push_back(std::make_pair(143, 153));
+    e->limitsNeurons.push_back(std::make_pair(154, 164));
+    e->limitsNeurons.push_back(std::make_pair(165, 175));
+    e->limitsNeurons.push_back(std::make_pair(176, 186));
+    e->limitsNeurons.push_back(std::make_pair(187, 197));
+    e->limitsNeurons.push_back(std::make_pair(198, 208));
+    e->limitsNeurons.push_back(std::make_pair(209, 219));
+    e->limitsNeurons.push_back(std::make_pair(220, 240)); // layer 3
+    e->limitsNeurons.push_back(std::make_pair(241, 261));
+    e->limitsNeurons.push_back(std::make_pair(262, 282));
+    e->limitsNeurons.push_back(std::make_pair(283, 303));
+    e->limitsNeurons.push_back(std::make_pair(304, 324));
+    e->limitsNeurons.push_back(std::make_pair(325, 345));
+    e->limitsNeurons.push_back(std::make_pair(346, 366));
+    e->limitsNeurons.push_back(std::make_pair(367, 387));
+    e->limitsNeurons.push_back(std::make_pair(388, 408));
+    e->limitsNeurons.push_back(std::make_pair(409, 429));
+    e->limitsNeurons.push_back(std::make_pair(430, 450));
+    e->limitsNeurons.push_back(std::make_pair(451, 471));
+    e->limitsNeurons.push_back(std::make_pair(472, 492));
+    e->limitsNeurons.push_back(std::make_pair(493, 513));
+    e->limitsNeurons.push_back(std::make_pair(514, 534));
+    e->limitsNeurons.push_back(std::make_pair(535, 551)); // layer 4
+
+    examples.push_back(ExampleNeuralNetworx("10-layer, multiple output"));
+    e = &(examples.back());
+    e->numNeuronsPerLayer = {25, 20, 15, 30, 10, 4, 12, 13, 7, 5};
+    e->activationStringPerLayer = {"l", "c", "e", "g", "h",
+                                   "s", "r", "S", "p", "t"};
+    e->activationPerLayer = {nnp::NeuralNetworx::Activation::IDENTITY,
+                             nnp::NeuralNetworx::Activation::COS,
+                             nnp::NeuralNetworx::Activation::EXP,
+                             nnp::NeuralNetworx::Activation::GAUSSIAN,
+                             nnp::NeuralNetworx::Activation::HARMONIC,
+                             nnp::NeuralNetworx::Activation::LOGISTIC,
+                             nnp::NeuralNetworx::Activation::RELU,
+                             nnp::NeuralNetworx::Activation::REVLOGISTIC,
+                             nnp::NeuralNetworx::Activation::SOFTPLUS,
+                             nnp::NeuralNetworx::Activation::TANH};
+    e->numLayers = 10;
+    e->numConnections = 2036;
+    e->numWeights = 1920;
+    e->numBiases = 116;
+
+    return;
+}
+
+struct ExampleActivation : public Example
+{
+    std::string          activation;
+    std::vector<double>  x;
+    std::vector<double>  f;
+
+    ExampleActivation();
+
+    ExampleActivation(std::string name)
+    {
+        this->name = name;
+        this->description = std::string("Activation example \"")
+                          + this->name
+                          + "\"";
+    }
+};
+
+template<>
+void BoostDataContainer<ExampleActivation>::setup()
+{
+    ExampleActivation* e = nullptr;
+
+    examples.push_back(ExampleActivation("Linear \"l\""));
+    e = &(examples.back());
+    e->activation = "l";
+    e->x = {-1.5, -0.7, 0.0, 0.7, 1.5};
+    e->f = {-1.5, -0.7, 0.0, 0.7, 1.5};
+
+    examples.push_back(ExampleActivation("Hyperbolic Tangent \"t\""));
+    e = &(examples.back());
+    e->activation = "t";
+    e->x = {-1.5, -0.7, 0.0, 0.7, 1.5};
+    e->f = {-9.0514825364486640E-01, -6.0436777711716361E-01,  0.0,  6.0436777711716361E-01,  9.0514825364486640E-01};
+
+    examples.push_back(ExampleActivation("Logistic \"s\""));
+    e = &(examples.back());
+    e->activation = "s";
+    e->x = {-1.5, -0.7, 0.0, 0.7, 1.5};
+    e->f = {1.8242552380635635E-01,  3.3181222783183389E-01,  5.0E-01,  6.6818777216816616E-01,  8.1757447619364365E-01};
+
+    examples.push_back(ExampleActivation("Softplus \"p\""));
+    e = &(examples.back());
+    e->activation = "p";
+    e->x = {-1.5, -0.7, 0.0, 0.7, 1.5};
+    e->f = {2.0141327798275246E-01,  4.0318604888545784E-01,  6.9314718055994529E-01,  1.1031860488854579E+00,  1.7014132779827524E+00};
+
+    examples.push_back(ExampleActivation("ReLU \"r\""));
+    e = &(examples.back());
+    e->activation = "r";
+    e->x = {-1.5, -0.7, -0.1, 0.1, 0.7, 1.5};
+    e->f = {0.0, 0.0, 0.0, 0.1, 0.7, 1.5};
+
+    examples.push_back(ExampleActivation("Gaussian \"g\""));
+    e = &(examples.back());
+    e->activation = "g";
+    e->x = {-1.5, -0.7, 0.0, 0.7, 1.5};
+    e->f = {3.2465246735834974E-01,  7.8270453824186814E-01,  1.0E+00,  7.8270453824186814E-01,  3.2465246735834974E-01};
+
+    examples.push_back(ExampleActivation("Reverse Logistic \"S\""));
+    e = &(examples.back());
+    e->activation = "S";
+    e->x = {-1.5, -0.7, 0.0, 0.7, 1.5};
+    e->f = {8.1757447619364365E-01,  6.6818777216816616E-01,  5.0E-01,  3.3181222783183384E-01,  1.8242552380635635E-01};
+
+    examples.push_back(ExampleActivation("Exponential \"e\""));
+    e = &(examples.back());
+    e->activation = "e";
+    e->x = {-1.5, -0.7, 0.0, 0.7, 1.5};
+    e->f = {4.4816890703380645E+00,  2.0137527074704766E+00,  1.0E+00,  4.9658530379140953E-01,  2.2313016014842982E-01};
+
+    examples.push_back(ExampleActivation("Harmonic \"h\""));
+    e = &(examples.back());
+    e->activation = "h";
+    e->x = {-1.5, -0.7, 0.0, 0.7, 1.5};
+    e->f = {2.25E+00,  4.8999999999999994E-01,  0.0,  4.8999999999999994E-01,  2.25E+00};
+
+    return;
+}
+
+#endif
diff --git a/test/cpp/Example_nnp_train.h b/test/cpp/Example_nnp_train.h
index fe4aa3d22..88ed24a76 100644
--- a/test/cpp/Example_nnp_train.h
+++ b/test/cpp/Example_nnp_train.h
@@ -28,13 +28,13 @@ void BoostDataContainer<Example_nnp_train>::setup()
     e->rmseForcesTrain = 3.77302629E-02;
     e->rmseForcesTest  = 2.25135617E-01;
 
-    //examples.push_back(Example_nnp_train("H2O_RPBE-D3"));
-    //e = &(examples.back());
-    //e->lastEpoch = 10;
-    //e->rmseEnergyTrain = 1.02312914E-04;
-    //e->rmseEnergyTest  = 5.28837597E-04;
-    //e->rmseForcesTrain = 1.67069652E-02;
-    //e->rmseForcesTest  = 1.07708383E-02;
+    examples.push_back(Example_nnp_train("H2O_RPBE-D3"));
+    e = &(examples.back());
+    e->lastEpoch = 5;
+    e->rmseEnergyTrain = 9.19395999E-04;
+    e->rmseEnergyTest  = 7.42520683E-04;
+    e->rmseForcesTrain = 2.40682415E-02;
+    e->rmseForcesTest  = 1.44776060E-02;
 
     return;
 }
diff --git a/test/cpp/makefile b/test/cpp/makefile
index 9fcc5f411..3ac311611 100644
--- a/test/cpp/makefile
+++ b/test/cpp/makefile
@@ -34,10 +34,10 @@ OPTIONS+=$(PROJECT_OPTIONS)
 DEBUG=$(PROJECT_DEBUG) $(PROJECT_TEST)
 
 # Extra include paths for compiling.
-INCLUDES=-I./ -I$(PROJECT_DIR)/src/libnnp -I$(PROJECT_DIR)/src/libnnptrain
+INCLUDES=-I./ -I$(PROJECT_DIR)/src/libnnp -I$(PROJECT_DIR)/src/libnnptrain -I$(PROJECT_EIGEN)
 
 # Extra flags for linking.
-LDFLAGS=$(PROJECT_LIB)/libnnp.a $(PROJECT_LIB)/libnnptrain.a -lboost_system -lboost_unit_test_framework -lboost_filesystem
+LDFLAGS=$(PROJECT_LIB)/libnnp.a $(PROJECT_LIB)/libnnptrain.a -lboost_system -lboost_unit_test_framework -lboost_filesystem $(PROJECT_LDFLAGS_BLAS)
 
 
 ###############################################################################
diff --git a/test/cpp/test_NeuralNetworx.cpp b/test/cpp/test_NeuralNetworx.cpp
new file mode 100644
index 000000000..1b9b50df6
--- /dev/null
+++ b/test/cpp/test_NeuralNetworx.cpp
@@ -0,0 +1,306 @@
+#define BOOST_TEST_DYN_LINK
+#define BOOST_TEST_MODULE NeuralNetworx
+#include <boost/test/unit_test.hpp>
+#include <boost/test/data/test_case.hpp>
+#include <boost/test/data/monomorphic.hpp>
+#include <boost/filesystem.hpp>
+#include "fileHelpers.h"
+#include "ExampleNeuralNetworx.h"
+
+#include "NeuralNetworx.h"
+#include "utility.h"
+#include <algorithm> // std::fill, std::generate
+#include <cstddef>   // std::size_t
+#include <iostream>
+#include <numeric>   // std::iota
+#include <random>    // std::mt19937_64
+#include <string>    // std::string
+#include <vector>    // std::vector
+
+using namespace std;
+using namespace nnp;
+
+namespace bdata = boost::unit_test::data;
+namespace bfs = boost::filesystem;
+
+double const accuracy = 10.0 * numeric_limits<double>::epsilon();
+double const delta = 1.0E-5;
+
+BoostDataContainer<ExampleNeuralNetworx> containerNN;
+BoostDataContainer<ExampleActivation> containerActivation;
+
+BOOST_AUTO_TEST_SUITE(UnitTests)
+
+BOOST_DATA_TEST_CASE(Initialize_CorrectNetworkArchitecture,
+                     bdata::make(containerNN.examples),
+                     example)
+{
+    NeuralNetworx nn(example.numNeuronsPerLayer, example.activationPerLayer);
+
+    BOOST_REQUIRE_EQUAL(nn.getNumLayers(), example.numLayers);
+    BOOST_REQUIRE_EQUAL(nn.getNumConnections(), example.numConnections);
+    BOOST_REQUIRE_EQUAL(nn.getNumWeights(), example.numWeights);
+    BOOST_REQUIRE_EQUAL(nn.getNumBiases(), example.numBiases);
+}
+
+BOOST_DATA_TEST_CASE(InitializeWithStrings_CorrectNetworkArchitecture,
+                     bdata::make(containerNN.examples),
+                     example)
+{
+    NeuralNetworx nn(example.numNeuronsPerLayer,
+                     example.activationStringPerLayer);
+
+    BOOST_REQUIRE_EQUAL(nn.getNumLayers(), example.numLayers);
+    BOOST_REQUIRE_EQUAL(nn.getNumConnections(), example.numConnections);
+    BOOST_REQUIRE_EQUAL(nn.getNumWeights(), example.numWeights);
+    BOOST_REQUIRE_EQUAL(nn.getNumBiases(), example.numBiases);
+}
+
+BOOST_DATA_TEST_CASE(SetAndGetConnections_OriginalValuesRestored,
+                     bdata::make(containerNN.examples),
+                     example)
+{
+    NeuralNetworx nn(example.numNeuronsPerLayer, example.activationPerLayer);
+
+    vector<double> ci(nn.getNumConnections(), 0);
+    vector<double> co(nn.getNumConnections(), 0);
+    iota(ci.begin(), ci.end(), 0); 
+    nn.setConnections(ci);
+    nn.getConnections(co);
+    BOOST_CHECK_EQUAL_COLLECTIONS(ci.begin(), ci.end(), co.begin(), co.end());
+}
+
+BOOST_DATA_TEST_CASE(SetAndGetConnectionsAO_OriginalValuesRestored,
+                     bdata::make(containerNN.examples),
+                     example)
+{
+    NeuralNetworx nn(example.numNeuronsPerLayer, example.activationPerLayer);
+
+    vector<double> ci(nn.getNumConnections(), 0);
+    vector<double> co(nn.getNumConnections(), 0);
+    iota(ci.begin(), ci.end(), 0); 
+    nn.setConnectionsAO(ci);
+    nn.getConnectionsAO(co);
+    BOOST_CHECK_EQUAL_COLLECTIONS(ci.begin(), ci.end(), co.begin(), co.end());
+}
+
+BOOST_DATA_TEST_CASE(SetAndWriteConnections_OriginalValuesRestored,
+                     bdata::make(containerNN.examples),
+                     example)
+{
+    NeuralNetworx nn(example.numNeuronsPerLayer, example.activationPerLayer);
+
+    vector<double> ci(nn.getNumConnections(), 0);
+    vector<double> co;
+    iota(ci.begin(), ci.end(), 0); 
+    nn.setConnections(ci);
+    ofstream ofile;
+    ofile.open("weights.data");
+    nn.writeConnections(ofile);
+    ofile.close();
+    ifstream ifile;
+    ifile.open("weights.data");
+    BOOST_REQUIRE(ifile.is_open());
+    string line;
+    while (getline(ifile, line))
+    {
+        if (line.at(0) != '#')
+        {
+            vector<string> splitLine = split(reduce(line));
+            co.push_back(atof(splitLine.at(0).c_str()));
+        }
+    }
+    
+    BOOST_CHECK_EQUAL_COLLECTIONS(ci.begin(), ci.end(), co.begin(), co.end());
+    bfs::remove_all("weights.data");
+}
+
+BOOST_DATA_TEST_CASE(SetAndWriteConnectionsAO_OriginalValuesRestored,
+                     bdata::make(containerNN.examples),
+                     example)
+{
+    NeuralNetworx nn(example.numNeuronsPerLayer, example.activationPerLayer);
+
+    vector<double> ci(nn.getNumConnections(), 0);
+    vector<double> co;
+    iota(ci.begin(), ci.end(), 0); 
+    nn.setConnectionsAO(ci);
+    ofstream ofile;
+    ofile.open("weights.data");
+    nn.writeConnectionsAO(ofile);
+    ofile.close();
+    ifstream ifile;
+    ifile.open("weights.data");
+    BOOST_REQUIRE(ifile.is_open());
+    string line;
+    while (getline(ifile, line))
+    {
+        if (line.at(0) != '#')
+        {
+            vector<string> splitLine = split(reduce(line));
+            co.push_back(atof(splitLine.at(0).c_str()));
+        }
+    }
+    
+    BOOST_CHECK_EQUAL_COLLECTIONS(ci.begin(), ci.end(), co.begin(), co.end());
+    bfs::remove_all("weights.data");
+}
+
+BOOST_DATA_TEST_CASE(SingleNeuronActivation_AnalyticNumericDerivativesMatch,
+                     bdata::make(containerActivation.examples),
+                     example)
+{
+    // Set up NN with only two neurons.
+    NeuralNetworx nn(vector<size_t>{1, 1},
+                     vector<string>{"l", example.activation});
+    nn.setConnections(vector<double>{1.0, 0.0});
+
+    for (size_t i = 0; i < example.x.size(); ++i)
+    {
+        nn.propagate(vector<double>{example.x.at(i)});
+        vector<double> output;
+        nn.getOutput(output);
+        cout << output.at(0) << "\n";
+        BOOST_TEST_INFO(example.name + " x(" << i
+                        << ") = " << example.x.at(i));
+        BOOST_REQUIRE_SMALL(output.at(0) - example.f.at(i), accuracy);
+
+        nn.propagate(vector<double>{example.x.at(i) - delta});
+        double const yLeft = nn.getNeuronProperties(1, 0).at("y");
+
+        nn.propagate(vector<double>{example.x.at(i) + delta});
+        double const yRight = nn.getNeuronProperties(1, 0).at("y");
+
+        nn.propagate(vector<double>{example.x.at(i)});
+        auto yCenterProperties = nn.getNeuronProperties(1, 0);
+        double const y = yCenterProperties.at("y");
+        BOOST_REQUIRE_SMALL(y - example.f.at(i), accuracy);
+
+        double const dydx = yCenterProperties.at("dydx");
+        double const dydxNum = (yRight - yLeft) / (2.0 * delta);
+        BOOST_REQUIRE_SMALL(dydx - dydxNum, 10 * delta);
+
+        double const d2ydx2 = yCenterProperties.at("d2ydx2");
+        double const d2ydx2Num = (yRight - 2.0 * y + yLeft) / (delta * delta);
+
+        //cout << strpr("yl        : %23.16E\n", yLeft);
+        //cout << strpr("y         : %23.16E\n", y);
+        //cout << strpr("f         : %23.16E\n", example.f.at(i));
+        //cout << strpr("yr        : %23.16E\n", yRight);
+        //cout << strpr("yr-2y+yl  : %23.16E\n", yRight - 2.0 * y + yLeft);
+        //cout << strpr("delta**2  : %23.16E\n", delta * delta);
+        //cout << "\n";
+        //cout << strpr("dydx      : %23.16E\n", dydx);
+        //cout << strpr("dydxNum   : %23.16E\n", dydxNum);
+        //cout << "\n";
+        //cout << strpr("d2ydx2    : %23.16E\n", d2ydx2);
+        //cout << strpr("d2ydx2Num : %23.16E\n", d2ydx2Num);
+
+        BOOST_REQUIRE_SMALL(d2ydx2 - d2ydx2Num, 10 * delta);
+    }
+}
+
+BOOST_AUTO_TEST_CASE(ComputeInputDerivative_AnalyticNumericDerivativesMatch)
+{
+    size_t numInput = 3;
+    size_t numOutput = 4;
+    NeuralNetworx nn(vector<size_t>{numInput, 4, 5, numOutput},
+                     vector<string>{"l", "p", "p", "p"});
+
+    mt19937_64 rng(0);
+    uniform_real_distribution<double> distribution(-1.0, 1.0);
+    auto generator = [&distribution, &rng]() { return distribution(rng); };
+
+    vector<double> connections(nn.getNumConnections());
+    generate(connections.begin(), connections.end(), generator);
+    nn.setConnections(connections);
+
+    vector<double> input(numInput);
+    generate(input.begin(), input.end(), generator);
+    nn.setInput(input);
+
+    nn.propagate(true);
+    vector<vector<double>> derivInput;
+    nn.getDerivInput(derivInput);
+
+    vector<double> outputLeft;
+    vector<double> outputRight;
+    vector<vector<double>> derivInputNum(derivInput.size());
+    for (size_t i = 0; i < numInput; ++i)
+    {
+        vector<double> tmpInput = input;
+        tmpInput.at(i) -= delta;
+        nn.setInput(tmpInput);
+        nn.propagate();
+        nn.getOutput(outputLeft);
+
+        tmpInput.at(i) += 2.0 * delta;
+        nn.setInput(tmpInput);
+        nn.propagate();
+        nn.getOutput(outputRight);
+        for (size_t o = 0; o < numOutput; ++o)
+        {
+            derivInputNum.at(o).push_back(
+                (outputRight.at(o) - outputLeft.at(o)) / (2.0 * delta));
+        }
+    }
+
+    for (size_t o = 0; o < numOutput; ++o)
+    {
+        for (size_t i = 0; i < numInput; ++i)
+        {
+            BOOST_TEST_INFO("dy_out(" << o << ")/dy_in(" << i << ")");
+            BOOST_REQUIRE_SMALL(derivInput.at(o).at(i)
+                                - derivInputNum.at(o).at(i),
+                                10 * delta);
+        }
+    }
+
+    //for (auto l : nn.info()) cout << l;
+
+    //for (auto on : derivInputNum)
+    //{
+    //    for (auto in : on)
+    //    {
+    //        cout << in << " ";
+    //    }
+    //    cout << "\n";
+    //}
+
+    //cout << "\n";
+
+    //for (auto on : derivInput)
+    //{
+    //    for (auto in : on)
+    //    {
+    //        cout << in << " ";
+    //    }
+    //    cout << "\n";
+    //}
+}
+
+BOOST_DATA_TEST_CASE(GetLayerLimits_CorrectLimits,
+                     bdata::make(containerNN.examples),
+                     example)
+{
+    NeuralNetworx nn(example.numNeuronsPerLayer, example.activationPerLayer);
+    auto limits = nn.getLayerLimits();
+    if (example.limitsLayers.size() > 0)
+    {
+        BOOST_REQUIRE_EQUAL(limits.size(), example.limitsLayers.size());
+    }
+}
+
+BOOST_DATA_TEST_CASE(GetNeuronLimits_CorrectLimits,
+                     bdata::make(containerNN.examples),
+                     example)
+{
+    NeuralNetworx nn(example.numNeuronsPerLayer, example.activationPerLayer);
+    auto limits = nn.getNeuronLimits();
+    if (example.limitsNeurons.size() > 0)
+    {
+        //BOOST_REQUIRE_EQUAL(limits.size(), example.limitsNeurons.size());
+    }
+}
+
+BOOST_AUTO_TEST_SUITE_END()