From 60aa2bd583d757de03bbb13900d24e3b2b3a5b46 Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Wed, 4 Oct 2017 17:37:56 +0200 Subject: [PATCH 01/23] small improvements: better random initialization, use of stdlib instead of external crates, bugfixes added L2 reularization replaced sigmoid by ReLU activation --- Cargo.toml | 21 +++------------- README.md | 9 +++---- src/lib.rs | 72 +++++++++++++++++++++++++++++++++--------------------- 3 files changed, 52 insertions(+), 50 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0073463..178f8aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,21 +1,8 @@ [package] - name = "nn" -version = "0.1.6" -authors = ["Jack Montgomery "] -repository = "https://github.com/jackm321/RustNN" -documentation = "https://jackm321.github.io/RustNN/doc/nn/" -license = "Apache-2.0" -readme = "README.md" - -description = """ -A multilayer feedforward backpropagation neural network library -""" - -keywords = ["nn", "neural-network", "classifier", "backpropagation", - "machine-learning"] +version = "0.6.0" +authors = ["https://github.com/jackm321/RustNN"] [dependencies] -rand = "0.3.7" -rustc-serialize = "0.3.12" -time = "0.1.24" +rand = "0.3.*" +rustc-serialize = "0.3.*" diff --git a/README.md b/README.md index 5eb76f2..9baa924 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,8 @@ # RustNN -[![Build Status](https://travis-ci.org/jackm321/RustNN.svg?branch=master)](https://travis-ci.org/jackm321/RustNN) - An easy to use neural network library written in Rust. -[Crate](https://crates.io/crates/nn) - -[Documentation](https://jackm321.github.io/RustNN/doc/nn/) +For the documentation take a look at the original library. There is only an additional lambda factor for training. ## Description RustNN is a [feedforward neural network ](http://en.wikipedia.org/wiki/Feedforward_neural_network) @@ -15,6 +11,9 @@ generates fully connected multi-layer artificial neural networks that are trained via [backpropagation](http://en.wikipedia.org/wiki/Backpropagation). Networks are trained using an incremental training mode. +## Fork +This fork adds L2 regularization to the original crate and replaces sigmoid by the relu activation function. Additionally, there are a few minor improvements. + ## XOR example This example creates a neural network with `2` nodes in the input layer, diff --git a/src/lib.rs b/src/lib.rs index 45c2a77..2d97bb5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,5 @@ +//! Modified version, originally from: https://github.com/jackm321/RustNN +//! //! An easy to use neural network library written in Rust. //! //! # Description @@ -57,19 +59,20 @@ extern crate rand; extern crate rustc_serialize; -extern crate time; use HaltCondition::{ Epochs, MSE, Timer }; use LearningMode::{ Incremental }; use std::iter::{Zip, Enumerate}; use std::slice; +use std::time::{ Duration, Instant }; use rustc_serialize::json; -use time::{ Duration, PreciseTime }; -use rand::Rng; +//use rand::Rng; +use rand::distributions::{Normal, IndependentSample}; -static DEFAULT_LEARNING_RATE: f64 = 0.3f64; -static DEFAULT_MOMENTUM: f64 = 0f64; -static DEFAULT_EPOCHS: u32 = 1000; +const DEFAULT_LEARNING_RATE: f64 = 0.3f64; +const DEFAULT_LAMBDA: f64 = 0.0f64; +const DEFAULT_MOMENTUM: f64 = 0.0f64; +const DEFAULT_EPOCHS: u32 = 1000; /// Specifies when to stop training the network #[derive(Debug, Copy, Clone)] @@ -95,6 +98,7 @@ pub struct Trainer<'a,'b> { examples: &'b [(Vec, Vec)], rate: f64, momentum: f64, + lambda: f64, log_interval: Option, halt_condition: HaltCondition, learning_mode: LearningMode, @@ -117,10 +121,20 @@ impl<'a,'b> Trainer<'a,'b> { self.rate = rate; self } + + /// Specifies the lambda factor for L2 regularization used when training (default is 0.0) + pub fn lambda(&mut self, lambda: f64) -> &mut Trainer<'a,'b> { + if lambda <= 0f64 { + panic!("the lambda value must be a positive number"); + } + + self.lambda = lambda; + self + } /// Specifies the momentum to be used when training (default is `0.0`) pub fn momentum(&mut self, momentum: f64) -> &mut Trainer<'a,'b> { - if momentum <= 0f64 { + if momentum < 0f64 { panic!("momentum must be positive"); } @@ -175,6 +189,7 @@ impl<'a,'b> Trainer<'a,'b> { self.nn.train_details( self.examples, self.rate, + self.lambda, self.momentum, self.log_interval, self.halt_condition @@ -220,10 +235,11 @@ impl NN { let mut prev_layer_size = first_layer_size; for &layer_size in it { let mut layer: Vec> = Vec::new(); + let normal = Normal::new(0.0, (2.0/prev_layer_size as f64).sqrt()); for _ in 0..layer_size { let mut node: Vec = Vec::new(); for _ in 0..prev_layer_size+1 { - let random_weight: f64 = rng.gen_range(-0.5f64, 0.5f64); + let random_weight: f64 = normal.ind_sample(&mut rng); node.push(random_weight); } node.shrink_to_fit(); @@ -257,6 +273,7 @@ impl NN { examples: examples, rate: DEFAULT_LEARNING_RATE, momentum: DEFAULT_MOMENTUM, + lambda: DEFAULT_LAMBDA, log_interval: None, halt_condition: Epochs(DEFAULT_EPOCHS), learning_mode: Incremental, @@ -275,7 +292,7 @@ impl NN { network } - fn train_details(&mut self, examples: &[(Vec, Vec)], rate: f64, momentum: f64, log_interval: Option, + fn train_details(&mut self, examples: &[(Vec, Vec)], rate: f64, lambda: f64, momentum: f64, log_interval: Option, halt_condition: HaltCondition) -> f64 { // check that input and output sizes are correct @@ -290,16 +307,16 @@ impl NN { } } - self.train_incremental(examples, rate, momentum, log_interval, halt_condition) + self.train_incremental(examples, rate, lambda, momentum, log_interval, halt_condition) } - fn train_incremental(&mut self, examples: &[(Vec, Vec)], rate: f64, momentum: f64, log_interval: Option, + fn train_incremental(&mut self, examples: &[(Vec, Vec)], rate: f64, lambda: f64, momentum: f64, log_interval: Option, halt_condition: HaltCondition) -> f64 { let mut prev_deltas = self.make_weights_tracker(0.0f64); let mut epochs = 0u32; let mut training_error_rate = 0f64; - let start_time = PreciseTime::now(); + let start_time = Instant::now(); loop { @@ -321,8 +338,7 @@ impl NN { if training_error_rate <= target_error { break } }, Timer(duration) => { - let now = PreciseTime::now(); - if start_time.to(now) >= duration { break } + if start_time.elapsed() >= duration { break } } } } @@ -333,7 +349,7 @@ impl NN { let results = self.do_run(&inputs); let weight_updates = self.calculate_weight_updates(&results, &targets); training_error_rate += calculate_error(&results, &targets); - self.update_weights(&weight_updates, &mut prev_deltas, rate, momentum) + self.update_weights(&weight_updates, &mut prev_deltas, rate, lambda, momentum) } epochs += 1; @@ -348,7 +364,7 @@ impl NN { for (layer_index, layer) in self.layers.iter().enumerate() { let mut layer_results = Vec::new(); for node in layer.iter() { - layer_results.push( sigmoid(modified_dotprod(&node, &results[layer_index])) ) + layer_results.push( relu(modified_dotprod(&node, &results[layer_index])) ) } results.push(layer_results); } @@ -356,7 +372,7 @@ impl NN { } // updates all weights in the network - fn update_weights(&mut self, network_weight_updates: &Vec>>, prev_deltas: &mut Vec>>, rate: f64, momentum: f64) { + fn update_weights(&mut self, network_weight_updates: &Vec>>, prev_deltas: &mut Vec>>, rate: f64, lambda: f64, momentum: f64) { for layer_index in 0..self.layers.len() { let mut layer = &mut self.layers[layer_index]; let layer_weight_updates = &network_weight_updates[layer_index]; @@ -367,7 +383,7 @@ impl NN { let weight_update = node_weight_updates[weight_index]; let prev_delta = prev_deltas[layer_index][node_index][weight_index]; let delta = (rate * weight_update) + (momentum * prev_delta); - node[weight_index] += delta; + node[weight_index] = (1.0 - rate * lambda) * node[weight_index] + delta; prev_deltas[layer_index][node_index][weight_index] = delta; } } @@ -382,32 +398,32 @@ impl NN { let layers = &self.layers; let network_results = &results[1..]; // skip the input layer let mut next_layer_nodes: Option<&Vec>> = None; - + for (layer_index, (layer_nodes, layer_results)) in iter_zip_enum(layers, network_results).rev() { let prev_layer_results = &results[layer_index]; let mut layer_errors = Vec::new(); let mut layer_weight_updates = Vec::new(); - - + + for (node_index, (node, &result)) in iter_zip_enum(layer_nodes, layer_results) { let mut node_weight_updates = Vec::new(); - let mut node_error; - + let node_error; + // calculate error for this node if layer_index == layers.len() - 1 { - node_error = result * (1f64 - result) * (targets[node_index] - result); + node_error = (if result > 0.0f64 { 1.0f64 } else { 0.0f64 }) * (targets[node_index] - result); //derivative of activation function appears here } else { let mut sum = 0f64; let next_layer_errors = &network_errors[network_errors.len() - 1]; for (next_node, &next_node_error_data) in next_layer_nodes.unwrap().iter().zip((next_layer_errors).iter()) { sum += next_node[node_index+1] * next_node_error_data; // +1 because the 0th weight is the threshold } - node_error = result * (1f64 - result) * sum; + node_error = (if result > 0.0f64 { 1.0f64 } else { 0.0f64 }) * sum; //derivative of activation function appears here } // calculate weight updates for this node for weight_index in 0..node.len() { - let mut prev_layer_result; + let prev_layer_result; if weight_index == 0 { prev_layer_result = 1f64; // threshold } else { @@ -459,8 +475,8 @@ fn modified_dotprod(node: &Vec, values: &Vec) -> f64 { total } -fn sigmoid(y: f64) -> f64 { - 1f64 / (1f64 + (-y).exp()) +fn relu(y: f64) -> f64 { + y.max(0.0) //below 0 the output ist 0, above it is output=input (linear) } From 28d3ff51d402c6d106ce11674672aa4e796925ee Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Wed, 4 Oct 2017 17:50:18 +0200 Subject: [PATCH 02/23] bugfix --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 2d97bb5..bf879fa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -124,7 +124,7 @@ impl<'a,'b> Trainer<'a,'b> { /// Specifies the lambda factor for L2 regularization used when training (default is 0.0) pub fn lambda(&mut self, lambda: f64) -> &mut Trainer<'a,'b> { - if lambda <= 0f64 { + if lambda < 0f64 { panic!("the lambda value must be a positive number"); } From 2e3f45d91b12dfce7d9027d9185e14eb2e2994ae Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Thu, 5 Oct 2017 00:00:19 +0200 Subject: [PATCH 03/23] replace ReLU by PELU to fix dead gradients --- .travis.yml | 1 - README.md | 2 +- src/lib.rs | 33 +++++++++++++++++++++++++++------ tests/xor.rs | 3 +-- 4 files changed, 29 insertions(+), 10 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 613564f..0000000 --- a/.travis.yml +++ /dev/null @@ -1 +0,0 @@ -language: rust \ No newline at end of file diff --git a/README.md b/README.md index 9baa924..c50f546 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ are trained via [backpropagation](http://en.wikipedia.org/wiki/Backpropagation). Networks are trained using an incremental training mode. ## Fork -This fork adds L2 regularization to the original crate and replaces sigmoid by the relu activation function. Additionally, there are a few minor improvements. +This fork adds L2 regularization to the original crate and replaces sigmoid by the PELU activation function. Additionally, there are a few minor improvements. ## XOR example diff --git a/src/lib.rs b/src/lib.rs index bf879fa..0d0317f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -74,6 +74,10 @@ const DEFAULT_LAMBDA: f64 = 0.0f64; const DEFAULT_MOMENTUM: f64 = 0.0f64; const DEFAULT_EPOCHS: u32 = 1000; +const PELU_FACTOR_A: f64 = 1.0f64; +const PELU_FACTOR_B: f64 = 1.0f64; + + /// Specifies when to stop training the network #[derive(Debug, Copy, Clone)] pub enum HaltCondition { @@ -364,7 +368,8 @@ impl NN { for (layer_index, layer) in self.layers.iter().enumerate() { let mut layer_results = Vec::new(); for node in layer.iter() { - layer_results.push( relu(modified_dotprod(&node, &results[layer_index])) ) + layer_results.push( pelu(modified_dotprod(&node, &results[layer_index])) ) //pelu + //layer_results.push( sigmoid(modified_dotprod(&node, &results[layer_index])) ) //sigmoid } results.push(layer_results); } @@ -388,7 +393,6 @@ impl NN { } } } - } // calculates all weight updates by backpropagation @@ -411,14 +415,18 @@ impl NN { // calculate error for this node if layer_index == layers.len() - 1 { - node_error = (if result > 0.0f64 { 1.0f64 } else { 0.0f64 }) * (targets[node_index] - result); //derivative of activation function appears here + let act_deriv = if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }; //pelu + //let act_deriv = result * (1.0 - result); //sigmoid + node_error = act_deriv * (targets[node_index] - result); } else { let mut sum = 0f64; let next_layer_errors = &network_errors[network_errors.len() - 1]; for (next_node, &next_node_error_data) in next_layer_nodes.unwrap().iter().zip((next_layer_errors).iter()) { sum += next_node[node_index+1] * next_node_error_data; // +1 because the 0th weight is the threshold } - node_error = (if result > 0.0f64 { 1.0f64 } else { 0.0f64 }) * sum; //derivative of activation function appears here + let act_deriv = if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }; //pelu + //let act_deriv = result * (1.0 - result); //sigmoid + node_error = act_deriv * sum; } // calculate weight updates for this node @@ -475,8 +483,21 @@ fn modified_dotprod(node: &Vec, values: &Vec) -> f64 { total } -fn relu(y: f64) -> f64 { - y.max(0.0) //below 0 the output ist 0, above it is output=input (linear) +#[allow(dead_code)] +fn sigmoid(y: f64) -> f64 { + 1f64 / (1f64 + (-y).exp()) +} + +#[allow(dead_code)] +fn pelu(y: f64) -> f64 { + if y < 0.0 //PELU activation + { + PELU_FACTOR_A * ((y / PELU_FACTOR_B).exp() - 1.0) + } + else + { + (PELU_FACTOR_A / PELU_FACTOR_B) * y + } } diff --git a/tests/xor.rs b/tests/xor.rs index 4ef51eb..c93110f 100644 --- a/tests/xor.rs +++ b/tests/xor.rs @@ -1,5 +1,4 @@ extern crate nn; -extern crate time; use nn::{NN, HaltCondition, LearningMode}; @@ -15,7 +14,7 @@ fn xor_4layers() { // create a new neural network let mut net1 = NN::new(&[2,3,3,1]); - + // train the network net1.train(&examples) .log_interval(Some(1000)) From 2616bc394fc8a872cbdaae13d95767b3c01dc39a Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Thu, 5 Oct 2017 01:39:24 +0200 Subject: [PATCH 04/23] little change to initialization --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 0d0317f..55e600e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -239,7 +239,7 @@ impl NN { let mut prev_layer_size = first_layer_size; for &layer_size in it { let mut layer: Vec> = Vec::new(); - let normal = Normal::new(0.0, (2.0/prev_layer_size as f64).sqrt()); + let normal = Normal::new(0.0, (9.0/prev_layer_size as f64).sqrt()); for _ in 0..layer_size { let mut node: Vec = Vec::new(); for _ in 0..prev_layer_size+1 { From 1e5e4008c03ebaa7356fa81863e9cccb6c66a9be Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Thu, 5 Oct 2017 02:00:12 +0200 Subject: [PATCH 05/23] provide both Sigmoid and PELU --- README.md | 3 ++- src/lib.rs | 38 ++++++++++++++++++++++++++------------ 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index c50f546..f81b244 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,8 @@ are trained via [backpropagation](http://en.wikipedia.org/wiki/Backpropagation). Networks are trained using an incremental training mode. ## Fork -This fork adds L2 regularization to the original crate and replaces sigmoid by the PELU activation function. Additionally, there are a few minor improvements. +This fork adds L2 regularization and PELU activation to the original crate. Additionally, there are a few minor improvements. +Lambda can be set just like the learning rate. The activation function gets set in NN::new as second parameter. ## XOR example diff --git a/src/lib.rs b/src/lib.rs index 55e600e..162aa68 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -78,6 +78,15 @@ const PELU_FACTOR_A: f64 = 1.0f64; const PELU_FACTOR_B: f64 = 1.0f64; +/// Specifies the activation function +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum Activation { + /// Sigmoid activation + Sigmoid, + /// PELU activation + PELU, +} + /// Specifies when to stop training the network #[derive(Debug, Copy, Clone)] pub enum HaltCondition { @@ -207,16 +216,17 @@ impl<'a,'b> Trainer<'a,'b> { pub struct NN { layers: Vec>>, num_inputs: u32, + activation: u32, } impl NN { - - /// Each number in the `layers_sizes` parameter specifies a + /// Each number in the `layers_sizes` parameter specifies a /// layer in the network. The number itself is the number of nodes in that /// layer. The first number is the input layer, the last /// number is the output layer, and all numbers between the first and /// last are hidden layers. There must be at least two layers in the network. - pub fn new(layers_sizes: &[u32]) -> NN { + /// The activation function can be Sigmoid or PELU. + pub fn new(layers_sizes: &[u32], activation: Activation) -> NN { let mut rng = rand::thread_rng(); if layers_sizes.len() < 2 { @@ -254,7 +264,7 @@ impl NN { prev_layer_size = layer_size; } layers.shrink_to_fit(); - NN { layers: layers, num_inputs: first_layer_size } + NN { layers: layers, num_inputs: first_layer_size, activation: if activation == Activation::Sigmoid { 0 } else { 1 } } } /// Runs the network on an input and returns a vector of the results. @@ -368,8 +378,10 @@ impl NN { for (layer_index, layer) in self.layers.iter().enumerate() { let mut layer_results = Vec::new(); for node in layer.iter() { - layer_results.push( pelu(modified_dotprod(&node, &results[layer_index])) ) //pelu - //layer_results.push( sigmoid(modified_dotprod(&node, &results[layer_index])) ) //sigmoid + match self.activation { + 0 => layer_results.push( sigmoid(modified_dotprod(&node, &results[layer_index])) ), //sigmoid + _ => layer_results.push( pelu(modified_dotprod(&node, &results[layer_index])) ), //pelu + } } results.push(layer_results); } @@ -415,8 +427,10 @@ impl NN { // calculate error for this node if layer_index == layers.len() - 1 { - let act_deriv = if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }; //pelu - //let act_deriv = result * (1.0 - result); //sigmoid + let act_deriv = match self.activation { + 0 => result * (1.0 - result), //sigmoid + _ => if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }, //pelu + }; node_error = act_deriv * (targets[node_index] - result); } else { let mut sum = 0f64; @@ -424,8 +438,10 @@ impl NN { for (next_node, &next_node_error_data) in next_layer_nodes.unwrap().iter().zip((next_layer_errors).iter()) { sum += next_node[node_index+1] * next_node_error_data; // +1 because the 0th weight is the threshold } - let act_deriv = if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }; //pelu - //let act_deriv = result * (1.0 - result); //sigmoid + let act_deriv = match self.activation { + 0 => result * (1.0 - result), //sigmoid + _ => if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }, //pelu + }; node_error = act_deriv * sum; } @@ -483,12 +499,10 @@ fn modified_dotprod(node: &Vec, values: &Vec) -> f64 { total } -#[allow(dead_code)] fn sigmoid(y: f64) -> f64 { 1f64 / (1f64 + (-y).exp()) } -#[allow(dead_code)] fn pelu(y: f64) -> f64 { if y < 0.0 //PELU activation { From ced89e1ff73956fe00a018c670456839d77bdb90 Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Thu, 5 Oct 2017 02:53:01 +0200 Subject: [PATCH 06/23] replace PELU by SELU --- README.md | 2 +- src/lib.rs | 25 +++++++++++++------------ 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index f81b244..079e2ff 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ are trained via [backpropagation](http://en.wikipedia.org/wiki/Backpropagation). Networks are trained using an incremental training mode. ## Fork -This fork adds L2 regularization and PELU activation to the original crate. Additionally, there are a few minor improvements. +This fork adds L2 regularization and SELU activation to the original crate. Additionally, there are a few minor improvements. Lambda can be set just like the learning rate. The activation function gets set in NN::new as second parameter. ## XOR example diff --git a/src/lib.rs b/src/lib.rs index 162aa68..f1a3e51 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -74,8 +74,9 @@ const DEFAULT_LAMBDA: f64 = 0.0f64; const DEFAULT_MOMENTUM: f64 = 0.0f64; const DEFAULT_EPOCHS: u32 = 1000; -const PELU_FACTOR_A: f64 = 1.0f64; -const PELU_FACTOR_B: f64 = 1.0f64; +//values for a (0,1) distribution (so (-1, 1) interval) +const SELU_FACTOR_A: f64 = 1.0507f64; //greater than 1, lambda in https://arxiv.org/pdf/1706.02515.pdf +const SELU_FACTOR_B: f64 = 1.6733f64; //alpha in https://arxiv.org/pdf/1706.02515.pdf /// Specifies the activation function @@ -83,8 +84,8 @@ const PELU_FACTOR_B: f64 = 1.0f64; pub enum Activation { /// Sigmoid activation Sigmoid, - /// PELU activation - PELU, + /// SELU activation + SELU, } /// Specifies when to stop training the network @@ -225,7 +226,7 @@ impl NN { /// layer. The first number is the input layer, the last /// number is the output layer, and all numbers between the first and /// last are hidden layers. There must be at least two layers in the network. - /// The activation function can be Sigmoid or PELU. + /// The activation function can be Sigmoid or SELU. Important: SELU optimized for (-1,1) interval pub fn new(layers_sizes: &[u32], activation: Activation) -> NN { let mut rng = rand::thread_rng(); @@ -380,7 +381,7 @@ impl NN { for node in layer.iter() { match self.activation { 0 => layer_results.push( sigmoid(modified_dotprod(&node, &results[layer_index])) ), //sigmoid - _ => layer_results.push( pelu(modified_dotprod(&node, &results[layer_index])) ), //pelu + _ => layer_results.push( selu(modified_dotprod(&node, &results[layer_index])) ), //selu } } results.push(layer_results); @@ -429,7 +430,7 @@ impl NN { if layer_index == layers.len() - 1 { let act_deriv = match self.activation { 0 => result * (1.0 - result), //sigmoid - _ => if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }, //pelu + _ => if result > 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_B }, //selu }; node_error = act_deriv * (targets[node_index] - result); } else { @@ -440,7 +441,7 @@ impl NN { } let act_deriv = match self.activation { 0 => result * (1.0 - result), //sigmoid - _ => if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }, //pelu + _ => if result > 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_B }, //selu }; node_error = act_deriv * sum; } @@ -503,14 +504,14 @@ fn sigmoid(y: f64) -> f64 { 1f64 / (1f64 + (-y).exp()) } -fn pelu(y: f64) -> f64 { - if y < 0.0 //PELU activation +fn selu(y: f64) -> f64 { + SELU_FACTOR_A * if y <= 0.0 //SELU activation { - PELU_FACTOR_A * ((y / PELU_FACTOR_B).exp() - 1.0) + SELU_FACTOR_B * y.exp() - SELU_FACTOR_B } else { - (PELU_FACTOR_A / PELU_FACTOR_B) * y + y } } From 83ce74fe3bde83dc59a04ec17b1f73d2536623f0 Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Thu, 5 Oct 2017 10:59:54 +0200 Subject: [PATCH 07/23] MSRA initialization --- src/lib.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f1a3e51..e07a3d0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -250,12 +250,19 @@ impl NN { let mut prev_layer_size = first_layer_size; for &layer_size in it { let mut layer: Vec> = Vec::new(); - let normal = Normal::new(0.0, (9.0/prev_layer_size as f64).sqrt()); + let normal = Normal::new(0.0, (1.0/prev_layer_size as f64).sqrt()); for _ in 0..layer_size { let mut node: Vec = Vec::new(); - for _ in 0..prev_layer_size+1 { - let random_weight: f64 = normal.ind_sample(&mut rng); - node.push(random_weight); + for i in 0..prev_layer_size+1 { + if i == 0 //threshold aka bias + { + node.push(0.0); + } + else + { + let random_weight: f64 = normal.ind_sample(&mut rng); + node.push(random_weight); + } } node.shrink_to_fit(); layer.push(node) From 694dc42747f5e6df83c995ad92adf0b0cb317c7b Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Thu, 5 Oct 2017 11:04:04 +0200 Subject: [PATCH 08/23] MSRA initialization --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index e07a3d0..025caf0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -250,7 +250,7 @@ impl NN { let mut prev_layer_size = first_layer_size; for &layer_size in it { let mut layer: Vec> = Vec::new(); - let normal = Normal::new(0.0, (1.0/prev_layer_size as f64).sqrt()); + let normal = Normal::new(0.0, (2.0/prev_layer_size as f64).sqrt()); for _ in 0..layer_size { let mut node: Vec = Vec::new(); for i in 0..prev_layer_size+1 { From 90df92351c9e3c4e542cb24cf2471df013a08673 Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Thu, 5 Oct 2017 11:10:06 +0200 Subject: [PATCH 09/23] SELU deviation fix --- src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 025caf0..11cbbf8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -437,7 +437,7 @@ impl NN { if layer_index == layers.len() - 1 { let act_deriv = match self.activation { 0 => result * (1.0 - result), //sigmoid - _ => if result > 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_B }, //selu + _ => if result > 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu }; node_error = act_deriv * (targets[node_index] - result); } else { @@ -448,7 +448,7 @@ impl NN { } let act_deriv = match self.activation { 0 => result * (1.0 - result), //sigmoid - _ => if result > 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_B }, //selu + _ => if result > 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu }; node_error = act_deriv * sum; } From fdfd563cddaebab4d4ea12b994f30d5964d3e337 Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Thu, 5 Oct 2017 12:50:39 +0200 Subject: [PATCH 10/23] fixes, PELU and LReLU added --- src/lib.rs | 77 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 11cbbf8..ce7877c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -69,14 +69,22 @@ use rustc_serialize::json; //use rand::Rng; use rand::distributions::{Normal, IndependentSample}; -const DEFAULT_LEARNING_RATE: f64 = 0.3f64; -const DEFAULT_LAMBDA: f64 = 0.0f64; -const DEFAULT_MOMENTUM: f64 = 0.0f64; -const DEFAULT_EPOCHS: u32 = 1000; +const DEFAULT_LEARNING_RATE:f64 = 0.3; +const DEFAULT_LAMBDA:f64 = 0.0; +const DEFAULT_MOMENTUM:f64 = 0.0; +const DEFAULT_EPOCHS:u32 = 1000; -//values for a (0,1) distribution (so (-1, 1) interval) -const SELU_FACTOR_A: f64 = 1.0507f64; //greater than 1, lambda in https://arxiv.org/pdf/1706.02515.pdf -const SELU_FACTOR_B: f64 = 1.6733f64; //alpha in https://arxiv.org/pdf/1706.02515.pdf +//values for a (0,1) distribution (so (-1, 1) interval in standard deviation) +//const SELU_FACTOR_A:f64 = 1.0507; //greater than 1, lambda in https://arxiv.org/pdf/1706.02515.pdf +//const SELU_FACTOR_B:f64 = 1.6733; //alpha in https://arxiv.org/pdf/1706.02515.pdf +//values for a (0,2) distribution (so (-2, 2) interval in standard deviation) +const SELU_FACTOR_A:f64 = 1.06071; //greater than 1, lambda in https://arxiv.org/pdf/1706.02515.pdf +const SELU_FACTOR_B:f64 = 1.97126; //alpha in https://arxiv.org/pdf/1706.02515.pdf + +const PELU_FACTOR_A:f64 = 2.0; +const PELU_FACTOR_B:f64 = 10.0; + +const LRELU_FACTOR:f64 = 0.33; /// Specifies the activation function @@ -86,6 +94,10 @@ pub enum Activation { Sigmoid, /// SELU activation SELU, + /// PELU activation + PELU, + /// Leaky ReLU activation + LRELU, } /// Specifies when to stop training the network @@ -226,7 +238,8 @@ impl NN { /// layer. The first number is the input layer, the last /// number is the output layer, and all numbers between the first and /// last are hidden layers. There must be at least two layers in the network. - /// The activation function can be Sigmoid or SELU. Important: SELU optimized for (-1,1) interval + /// The activation function can be Sigmoid, SELU, PELU or LRELU. + /// Important: Take care of inputs/outputs for the individual activation functions! pub fn new(layers_sizes: &[u32], activation: Activation) -> NN { let mut rng = rand::thread_rng(); @@ -250,7 +263,7 @@ impl NN { let mut prev_layer_size = first_layer_size; for &layer_size in it { let mut layer: Vec> = Vec::new(); - let normal = Normal::new(0.0, (2.0/prev_layer_size as f64).sqrt()); + let normal = Normal::new(0.0, (1.0 / prev_layer_size as f64).sqrt()); //2.0 / prev for _ in 0..layer_size { let mut node: Vec = Vec::new(); for i in 0..prev_layer_size+1 { @@ -272,7 +285,13 @@ impl NN { prev_layer_size = layer_size; } layers.shrink_to_fit(); - NN { layers: layers, num_inputs: first_layer_size, activation: if activation == Activation::Sigmoid { 0 } else { 1 } } + let act = match activation { + Activation::Sigmoid => 0, + Activation::SELU => 1, + Activation::PELU => 2, + Activation::LRELU => 3, + }; + NN { layers: layers, num_inputs: first_layer_size, activation: act } } /// Runs the network on an input and returns a vector of the results. @@ -388,7 +407,9 @@ impl NN { for node in layer.iter() { match self.activation { 0 => layer_results.push( sigmoid(modified_dotprod(&node, &results[layer_index])) ), //sigmoid - _ => layer_results.push( selu(modified_dotprod(&node, &results[layer_index])) ), //selu + 1 => layer_results.push( selu(modified_dotprod(&node, &results[layer_index])) ), //selu + 2 => layer_results.push( pelu(modified_dotprod(&node, &results[layer_index])) ), //pelu + _ => layer_results.push( lrelu(modified_dotprod(&node, &results[layer_index])) ), //lrelu } } results.push(layer_results); @@ -437,7 +458,9 @@ impl NN { if layer_index == layers.len() - 1 { let act_deriv = match self.activation { 0 => result * (1.0 - result), //sigmoid - _ => if result > 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu + 1 => if result >= 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu + 2 => if result >= 0.0f64 { SELU_FACTOR_A / SELU_FACTOR_B } else { (result + SELU_FACTOR_A) * SELU_FACTOR_B }, //pelu + _ => if result >= 0.0f64 { 1.0 } else { LRELU_FACTOR }, //lrelu }; node_error = act_deriv * (targets[node_index] - result); } else { @@ -448,7 +471,9 @@ impl NN { } let act_deriv = match self.activation { 0 => result * (1.0 - result), //sigmoid - _ => if result > 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu + 1 => if result >= 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu + 2 => if result >= 0.0f64 { SELU_FACTOR_A / SELU_FACTOR_B } else { (result + SELU_FACTOR_A) * SELU_FACTOR_B }, //pelu + _ => if result >= 0.0f64 { 1.0 } else { LRELU_FACTOR }, //lrelu }; node_error = act_deriv * sum; } @@ -511,8 +536,8 @@ fn sigmoid(y: f64) -> f64 { 1f64 / (1f64 + (-y).exp()) } -fn selu(y: f64) -> f64 { - SELU_FACTOR_A * if y <= 0.0 //SELU activation +fn selu(y: f64) -> f64 { //SELU activation + SELU_FACTOR_A * if y < 0.0 { SELU_FACTOR_B * y.exp() - SELU_FACTOR_B } @@ -522,6 +547,28 @@ fn selu(y: f64) -> f64 { } } +fn pelu(y: f64) -> f64 { //PELU activation + if y < 0.0 + { + SELU_FACTOR_A * (y / SELU_FACTOR_B).exp() - SELU_FACTOR_A + } + else + { + (PELU_FACTOR_A / PELU_FACTOR_B) * y + } +} + +fn lrelu(y: f64) -> f64 { //LRELU activation + if y < 0.0 + { + LRELU_FACTOR * y + } + else + { + y + } +} + // takes two arrays and enumerates the iterator produced by zipping each of // their iterators together From b911eaca4e035e3f5cb9e86171c8afaf518a0e3b Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Thu, 5 Oct 2017 12:56:21 +0200 Subject: [PATCH 11/23] readme change for previous commit --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 079e2ff..2b4c55e 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ An easy to use neural network library written in Rust. -For the documentation take a look at the original library. There is only an additional lambda factor for training. +For the documentation take a look at the original library or generate it using "cargo doc". ## Description RustNN is a [feedforward neural network ](http://en.wikipedia.org/wiki/Feedforward_neural_network) @@ -12,7 +12,7 @@ are trained via [backpropagation](http://en.wikipedia.org/wiki/Backpropagation). Networks are trained using an incremental training mode. ## Fork -This fork adds L2 regularization and SELU activation to the original crate. Additionally, there are a few minor improvements. +This fork adds L2 regularization and several activation functions to the original crate. Additionally, there are a few minor improvements. Lambda can be set just like the learning rate. The activation function gets set in NN::new as second parameter. ## XOR example From ff45185bb8bf1705e9770b59949f063e823246da Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Thu, 5 Oct 2017 13:51:48 +0200 Subject: [PATCH 12/23] split hidden and output activation --- README.md | 2 +- src/lib.rs | 43 ++++++++++++++++++++++++++++++++----------- 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 2b4c55e..4fe1694 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Networks are trained using an incremental training mode. ## Fork This fork adds L2 regularization and several activation functions to the original crate. Additionally, there are a few minor improvements. -Lambda can be set just like the learning rate. The activation function gets set in NN::new as second parameter. +Lambda can be set just like the learning rate. The activation functions for hidden and output gets set in NN::new as second and third parameter respectively. ## XOR example diff --git a/src/lib.rs b/src/lib.rs index ce7877c..f24704e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -229,7 +229,8 @@ impl<'a,'b> Trainer<'a,'b> { pub struct NN { layers: Vec>>, num_inputs: u32, - activation: u32, + hid_act: u32, + out_act: u32, } impl NN { @@ -240,7 +241,7 @@ impl NN { /// last are hidden layers. There must be at least two layers in the network. /// The activation function can be Sigmoid, SELU, PELU or LRELU. /// Important: Take care of inputs/outputs for the individual activation functions! - pub fn new(layers_sizes: &[u32], activation: Activation) -> NN { + pub fn new(layers_sizes: &[u32], hidden_activation: Activation, output_activation: Activation) -> NN { let mut rng = rand::thread_rng(); if layers_sizes.len() < 2 { @@ -285,13 +286,21 @@ impl NN { prev_layer_size = layer_size; } layers.shrink_to_fit(); - let act = match activation { + + //set activation functions + let hid_act = match hidden_activation { Activation::Sigmoid => 0, Activation::SELU => 1, Activation::PELU => 2, Activation::LRELU => 3, }; - NN { layers: layers, num_inputs: first_layer_size, activation: act } + let out_act = match output_activation { + Activation::Sigmoid => 0, + Activation::SELU => 1, + Activation::PELU => 2, + Activation::LRELU => 3, + }; + NN { layers: layers, num_inputs: first_layer_size, hid_act: hid_act, out_act: out_act } } /// Runs the network on an input and returns a vector of the results. @@ -405,11 +414,23 @@ impl NN { for (layer_index, layer) in self.layers.iter().enumerate() { let mut layer_results = Vec::new(); for node in layer.iter() { - match self.activation { - 0 => layer_results.push( sigmoid(modified_dotprod(&node, &results[layer_index])) ), //sigmoid - 1 => layer_results.push( selu(modified_dotprod(&node, &results[layer_index])) ), //selu - 2 => layer_results.push( pelu(modified_dotprod(&node, &results[layer_index])) ), //pelu - _ => layer_results.push( lrelu(modified_dotprod(&node, &results[layer_index])) ), //lrelu + if layer_index == self.layers.len()-1 //output layer + { + match self.out_act { + 0 => layer_results.push( sigmoid(modified_dotprod(&node, &results[layer_index])) ), //sigmoid + 1 => layer_results.push( selu(modified_dotprod(&node, &results[layer_index])) ), //selu + 2 => layer_results.push( pelu(modified_dotprod(&node, &results[layer_index])) ), //pelu + _ => layer_results.push( lrelu(modified_dotprod(&node, &results[layer_index])) ), //lrelu + } + } + else + { + match self.hid_act { + 0 => layer_results.push( sigmoid(modified_dotprod(&node, &results[layer_index])) ), //sigmoid + 1 => layer_results.push( selu(modified_dotprod(&node, &results[layer_index])) ), //selu + 2 => layer_results.push( pelu(modified_dotprod(&node, &results[layer_index])) ), //pelu + _ => layer_results.push( lrelu(modified_dotprod(&node, &results[layer_index])) ), //lrelu + } } } results.push(layer_results); @@ -456,7 +477,7 @@ impl NN { // calculate error for this node if layer_index == layers.len() - 1 { - let act_deriv = match self.activation { + let act_deriv = match self.out_act { //output activation 0 => result * (1.0 - result), //sigmoid 1 => if result >= 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu 2 => if result >= 0.0f64 { SELU_FACTOR_A / SELU_FACTOR_B } else { (result + SELU_FACTOR_A) * SELU_FACTOR_B }, //pelu @@ -469,7 +490,7 @@ impl NN { for (next_node, &next_node_error_data) in next_layer_nodes.unwrap().iter().zip((next_layer_errors).iter()) { sum += next_node[node_index+1] * next_node_error_data; // +1 because the 0th weight is the threshold } - let act_deriv = match self.activation { + let act_deriv = match self.hid_act { //hidden activation 0 => result * (1.0 - result), //sigmoid 1 => if result >= 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu 2 => if result >= 0.0f64 { SELU_FACTOR_A / SELU_FACTOR_B } else { (result + SELU_FACTOR_A) * SELU_FACTOR_B }, //pelu From 53704957b8ccbc9ceab9843f6d0117b77c89bf4a Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Thu, 5 Oct 2017 13:53:11 +0200 Subject: [PATCH 13/23] fit test to updates NN class --- tests/xor.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/xor.rs b/tests/xor.rs index c93110f..86fbf0f 100644 --- a/tests/xor.rs +++ b/tests/xor.rs @@ -1,6 +1,6 @@ extern crate nn; -use nn::{NN, HaltCondition, LearningMode}; +use nn::{NN, HaltCondition, LearningMode, Activation}; #[test] fn xor_4layers() { @@ -13,7 +13,7 @@ fn xor_4layers() { ]; // create a new neural network - let mut net1 = NN::new(&[2,3,3,1]); + let mut net1 = NN::new(&[2,3,3,1], Activation::LRELU, Activation::Sigmoid); // train the network net1.train(&examples) From 1e01e73caa9dd285732ab65182371d89fdba4cb5 Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Thu, 5 Oct 2017 15:21:55 +0200 Subject: [PATCH 14/23] PELU fix --- src/lib.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f24704e..f6653b3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -81,8 +81,8 @@ const DEFAULT_EPOCHS:u32 = 1000; const SELU_FACTOR_A:f64 = 1.06071; //greater than 1, lambda in https://arxiv.org/pdf/1706.02515.pdf const SELU_FACTOR_B:f64 = 1.97126; //alpha in https://arxiv.org/pdf/1706.02515.pdf -const PELU_FACTOR_A:f64 = 2.0; -const PELU_FACTOR_B:f64 = 10.0; +const PELU_FACTOR_A:f64 = 1.5; +const PELU_FACTOR_B:f64 = 2.0; const LRELU_FACTOR:f64 = 0.33; @@ -480,7 +480,7 @@ impl NN { let act_deriv = match self.out_act { //output activation 0 => result * (1.0 - result), //sigmoid 1 => if result >= 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu - 2 => if result >= 0.0f64 { SELU_FACTOR_A / SELU_FACTOR_B } else { (result + SELU_FACTOR_A) * SELU_FACTOR_B }, //pelu + 2 => if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }, //pelu _ => if result >= 0.0f64 { 1.0 } else { LRELU_FACTOR }, //lrelu }; node_error = act_deriv * (targets[node_index] - result); @@ -493,7 +493,7 @@ impl NN { let act_deriv = match self.hid_act { //hidden activation 0 => result * (1.0 - result), //sigmoid 1 => if result >= 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu - 2 => if result >= 0.0f64 { SELU_FACTOR_A / SELU_FACTOR_B } else { (result + SELU_FACTOR_A) * SELU_FACTOR_B }, //pelu + 2 => if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }, //pelu _ => if result >= 0.0f64 { 1.0 } else { LRELU_FACTOR }, //lrelu }; node_error = act_deriv * sum; From 761c4099866d2605cb945aa6b4b44b205a0d4e08 Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Sat, 7 Oct 2017 16:52:54 +0200 Subject: [PATCH 15/23] linear activation, possibly good for output, added --- src/lib.rs | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f6653b3..8d9cfd5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -98,6 +98,8 @@ pub enum Activation { PELU, /// Leaky ReLU activation LRELU, + /// Linear activation + Linear, } /// Specifies when to stop training the network @@ -293,12 +295,14 @@ impl NN { Activation::SELU => 1, Activation::PELU => 2, Activation::LRELU => 3, + Activation::Linear => 4, }; let out_act = match output_activation { Activation::Sigmoid => 0, Activation::SELU => 1, Activation::PELU => 2, Activation::LRELU => 3, + Activation::Linear => 4, }; NN { layers: layers, num_inputs: first_layer_size, hid_act: hid_act, out_act: out_act } } @@ -420,7 +424,8 @@ impl NN { 0 => layer_results.push( sigmoid(modified_dotprod(&node, &results[layer_index])) ), //sigmoid 1 => layer_results.push( selu(modified_dotprod(&node, &results[layer_index])) ), //selu 2 => layer_results.push( pelu(modified_dotprod(&node, &results[layer_index])) ), //pelu - _ => layer_results.push( lrelu(modified_dotprod(&node, &results[layer_index])) ), //lrelu + 3 => layer_results.push( lrelu(modified_dotprod(&node, &results[layer_index])) ), //lrelu + _ => layer_results.push( linear(modified_dotprod(&node, &results[layer_index])) ), //linear } } else @@ -429,7 +434,8 @@ impl NN { 0 => layer_results.push( sigmoid(modified_dotprod(&node, &results[layer_index])) ), //sigmoid 1 => layer_results.push( selu(modified_dotprod(&node, &results[layer_index])) ), //selu 2 => layer_results.push( pelu(modified_dotprod(&node, &results[layer_index])) ), //pelu - _ => layer_results.push( lrelu(modified_dotprod(&node, &results[layer_index])) ), //lrelu + 3 => layer_results.push( lrelu(modified_dotprod(&node, &results[layer_index])) ), //lrelu + _ => layer_results.push( linear(modified_dotprod(&node, &results[layer_index])) ), //linear } } } @@ -440,7 +446,7 @@ impl NN { // updates all weights in the network fn update_weights(&mut self, network_weight_updates: &Vec>>, prev_deltas: &mut Vec>>, rate: f64, lambda: f64, momentum: f64) { - for layer_index in 0..self.layers.len() { + for layer_index in 0..self.layers.len() { let mut layer = &mut self.layers[layer_index]; let layer_weight_updates = &network_weight_updates[layer_index]; for node_index in 0..layer.len() { @@ -481,7 +487,8 @@ impl NN { 0 => result * (1.0 - result), //sigmoid 1 => if result >= 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu 2 => if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }, //pelu - _ => if result >= 0.0f64 { 1.0 } else { LRELU_FACTOR }, //lrelu + 3 => if result >= 0.0f64 { 1.0 } else { LRELU_FACTOR }, //lrelu + _ => 1.0, //linear }; node_error = act_deriv * (targets[node_index] - result); } else { @@ -494,7 +501,8 @@ impl NN { 0 => result * (1.0 - result), //sigmoid 1 => if result >= 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu 2 => if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }, //pelu - _ => if result >= 0.0f64 { 1.0 } else { LRELU_FACTOR }, //lrelu + 3 => if result >= 0.0f64 { 1.0 } else { LRELU_FACTOR }, //lrelu + _ => 1.0, //linear }; node_error = act_deriv * sum; } @@ -590,6 +598,10 @@ fn lrelu(y: f64) -> f64 { //LRELU activation } } +fn linear(y: f64) -> f64 { //linear activation + y +} + // takes two arrays and enumerates the iterator produced by zipping each of // their iterators together From da8af9dec08d9afcff694c97810d54e2019245b5 Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Sun, 8 Oct 2017 02:51:15 +0200 Subject: [PATCH 16/23] fix PELU function --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 8d9cfd5..51e26c7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -579,7 +579,7 @@ fn selu(y: f64) -> f64 { //SELU activation fn pelu(y: f64) -> f64 { //PELU activation if y < 0.0 { - SELU_FACTOR_A * (y / SELU_FACTOR_B).exp() - SELU_FACTOR_A + PELU_FACTOR_A * (y / PELU_FACTOR_B).exp() - PELU_FACTOR_A } else { From c5c211249b6f28f6f8d814ed35599db455dedf4f Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Tue, 10 Oct 2017 14:17:44 +0200 Subject: [PATCH 17/23] Tanh activation added, changed initialization --- src/lib.rs | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 51e26c7..708e540 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -100,6 +100,8 @@ pub enum Activation { LRELU, /// Linear activation Linear, + /// Tanh activation + Tanh, } /// Specifies when to stop training the network @@ -241,7 +243,7 @@ impl NN { /// layer. The first number is the input layer, the last /// number is the output layer, and all numbers between the first and /// last are hidden layers. There must be at least two layers in the network. - /// The activation function can be Sigmoid, SELU, PELU or LRELU. + /// The activation function can be Sigmoid, SELU, PELU, LRELU or Tanh. /// Important: Take care of inputs/outputs for the individual activation functions! pub fn new(layers_sizes: &[u32], hidden_activation: Activation, output_activation: Activation) -> NN { let mut rng = rand::thread_rng(); @@ -266,7 +268,9 @@ impl NN { let mut prev_layer_size = first_layer_size; for &layer_size in it { let mut layer: Vec> = Vec::new(); - let normal = Normal::new(0.0, (1.0 / prev_layer_size as f64).sqrt()); //2.0 / prev + let mut init_std_scale = 2.0; //He init + if hidden_activation == Activation::SELU { init_std_scale = 1.0; } //MSRA / Xavier init + let normal = Normal::new(0.0, (init_std_scale / prev_layer_size as f64).sqrt()); for _ in 0..layer_size { let mut node: Vec = Vec::new(); for i in 0..prev_layer_size+1 { @@ -296,6 +300,7 @@ impl NN { Activation::PELU => 2, Activation::LRELU => 3, Activation::Linear => 4, + Activation::Tanh => 5, }; let out_act = match output_activation { Activation::Sigmoid => 0, @@ -303,6 +308,7 @@ impl NN { Activation::PELU => 2, Activation::LRELU => 3, Activation::Linear => 4, + Activation::Tanh => 5, }; NN { layers: layers, num_inputs: first_layer_size, hid_act: hid_act, out_act: out_act } } @@ -425,7 +431,8 @@ impl NN { 1 => layer_results.push( selu(modified_dotprod(&node, &results[layer_index])) ), //selu 2 => layer_results.push( pelu(modified_dotprod(&node, &results[layer_index])) ), //pelu 3 => layer_results.push( lrelu(modified_dotprod(&node, &results[layer_index])) ), //lrelu - _ => layer_results.push( linear(modified_dotprod(&node, &results[layer_index])) ), //linear + 4 => layer_results.push( linear(modified_dotprod(&node, &results[layer_index])) ), //linear + _ => layer_results.push( tanh(modified_dotprod(&node, &results[layer_index])) ), //tanh } } else @@ -435,7 +442,8 @@ impl NN { 1 => layer_results.push( selu(modified_dotprod(&node, &results[layer_index])) ), //selu 2 => layer_results.push( pelu(modified_dotprod(&node, &results[layer_index])) ), //pelu 3 => layer_results.push( lrelu(modified_dotprod(&node, &results[layer_index])) ), //lrelu - _ => layer_results.push( linear(modified_dotprod(&node, &results[layer_index])) ), //linear + 4 => layer_results.push( linear(modified_dotprod(&node, &results[layer_index])) ), //linear + _ => layer_results.push( tanh(modified_dotprod(&node, &results[layer_index])) ), //tanh } } } @@ -488,7 +496,8 @@ impl NN { 1 => if result >= 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu 2 => if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }, //pelu 3 => if result >= 0.0f64 { 1.0 } else { LRELU_FACTOR }, //lrelu - _ => 1.0, //linear + 4 => 1.0, //linear + _ => 1.0 - result * result, //tanh }; node_error = act_deriv * (targets[node_index] - result); } else { @@ -502,7 +511,8 @@ impl NN { 1 => if result >= 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu 2 => if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }, //pelu 3 => if result >= 0.0f64 { 1.0 } else { LRELU_FACTOR }, //lrelu - _ => 1.0, //linear + 4 => 1.0, //linear + _ => 1.0 - result * result, //tanh }; node_error = act_deriv * sum; } @@ -602,6 +612,10 @@ fn linear(y: f64) -> f64 { //linear activation y } +fn tanh(y: f64) -> f64 { //tanh activation + y.tanh() +} + // takes two arrays and enumerates the iterator produced by zipping each of // their iterators together From 8c5064c028fd257f2b8c824253b56787e4101783 Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Wed, 8 Nov 2017 13:27:38 +0100 Subject: [PATCH 18/23] cleanup, cosmetic, updated json library --- Cargo.toml | 27 ++++++++++++++- src/lib.rs | 97 +++++++++++++++++++++------------------------------- tests/xor.rs | 2 +- 3 files changed, 66 insertions(+), 60 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 178f8aa..d0155be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,4 +5,29 @@ authors = ["https://github.com/jackm321/RustNN"] [dependencies] rand = "0.3.*" -rustc-serialize = "0.3.*" +serde = "1.*" +serde_derive = "1.*" +serde_json = "1.*" + + + +[profile.dev] +opt-level = 3 +lto = true +panic = "unwind" +debug = true +debug-assertions = true + +[profile.test] +opt-level = 0 +lto = false +panic = "unwind" +debug = true +debug-assertions = true + +[profile.release] +opt-level = 3 +lto = true +panic = "unwind" +debug = false +debug-assertions = false diff --git a/src/lib.rs b/src/lib.rs index 708e540..8c635dd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,7 +21,7 @@ //! for more details. //! //! ```rust -//! use nn::{NN, HaltCondition}; +//! use nn::{NN, HaltCondition, Activation}; //! //! // create examples of the XOR function //! // the network is trained on tuples of vectors where the first vector @@ -37,7 +37,7 @@ //! // that specifies the number of layers and the number of nodes in each layer //! // in this case we have an input layer with 2 nodes, one hidden layer //! // with 3 nodes and the output layer has 1 node -//! let mut net = NN::new(&[2, 3, 1]); +//! let mut net = NN::new(&[2, 3, 1], Activation::PELU, Activation::Sigmoid); //! //! // train the network on the examples of the XOR function //! // all methods seen here are optional except go() which must be called to begin training @@ -57,16 +57,18 @@ //! } //! ``` +#[macro_use] +extern crate serde_derive; + +extern crate serde; +extern crate serde_json; extern crate rand; -extern crate rustc_serialize; use HaltCondition::{ Epochs, MSE, Timer }; use LearningMode::{ Incremental }; use std::iter::{Zip, Enumerate}; use std::slice; use std::time::{ Duration, Instant }; -use rustc_serialize::json; -//use rand::Rng; use rand::distributions::{Normal, IndependentSample}; const DEFAULT_LEARNING_RATE:f64 = 0.3; @@ -88,7 +90,7 @@ const LRELU_FACTOR:f64 = 0.33; /// Specifies the activation function -#[derive(Debug, Copy, Clone, PartialEq)] +#[derive(Debug, Copy, Clone, PartialEq, Deserialize, Serialize)] pub enum Activation { /// Sigmoid activation Sigmoid, @@ -229,12 +231,12 @@ impl<'a,'b> Trainer<'a,'b> { } /// Neural network -#[derive(Debug, Clone, RustcDecodable, RustcEncodable)] +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct NN { layers: Vec>>, num_inputs: u32, - hid_act: u32, - out_act: u32, + hid_act: Activation, + out_act: Activation, } impl NN { @@ -243,8 +245,9 @@ impl NN { /// layer. The first number is the input layer, the last /// number is the output layer, and all numbers between the first and /// last are hidden layers. There must be at least two layers in the network. - /// The activation function can be Sigmoid, SELU, PELU, LRELU or Tanh. + /// The activation function can be Sigmoid, SELU, PELU, LRELU, Linear or Tanh. /// Important: Take care of inputs/outputs for the individual activation functions! + /// Do not use linear activation for hidden layers. pub fn new(layers_sizes: &[u32], hidden_activation: Activation, output_activation: Activation) -> NN { let mut rng = rand::thread_rng(); @@ -293,24 +296,7 @@ impl NN { } layers.shrink_to_fit(); - //set activation functions - let hid_act = match hidden_activation { - Activation::Sigmoid => 0, - Activation::SELU => 1, - Activation::PELU => 2, - Activation::LRELU => 3, - Activation::Linear => 4, - Activation::Tanh => 5, - }; - let out_act = match output_activation { - Activation::Sigmoid => 0, - Activation::SELU => 1, - Activation::PELU => 2, - Activation::LRELU => 3, - Activation::Linear => 4, - Activation::Tanh => 5, - }; - NN { layers: layers, num_inputs: first_layer_size, hid_act: hid_act, out_act: out_act } + NN { layers: layers, num_inputs: first_layer_size, hid_act: hidden_activation, out_act: output_activation } } /// Runs the network on an input and returns a vector of the results. @@ -343,12 +329,12 @@ impl NN { /// Encodes the network as a JSON string. pub fn to_json(&self) -> String { - json::encode(self).ok().expect("encoding JSON failed") + serde_json::to_string(self).ok().expect("encoding JSON failed") } /// Builds a new network from a JSON string. pub fn from_json(encoded: &str) -> NN { - let network: NN = json::decode(encoded).ok().expect("decoding JSON failed"); + let network:NN = serde_json::from_str(encoded).ok().expect("decoding JSON failed"); network } @@ -424,27 +410,22 @@ impl NN { for (layer_index, layer) in self.layers.iter().enumerate() { let mut layer_results = Vec::new(); for node in layer.iter() { + let activation; if layer_index == self.layers.len()-1 //output layer { - match self.out_act { - 0 => layer_results.push( sigmoid(modified_dotprod(&node, &results[layer_index])) ), //sigmoid - 1 => layer_results.push( selu(modified_dotprod(&node, &results[layer_index])) ), //selu - 2 => layer_results.push( pelu(modified_dotprod(&node, &results[layer_index])) ), //pelu - 3 => layer_results.push( lrelu(modified_dotprod(&node, &results[layer_index])) ), //lrelu - 4 => layer_results.push( linear(modified_dotprod(&node, &results[layer_index])) ), //linear - _ => layer_results.push( tanh(modified_dotprod(&node, &results[layer_index])) ), //tanh - } + activation = self.out_act; } else { - match self.hid_act { - 0 => layer_results.push( sigmoid(modified_dotprod(&node, &results[layer_index])) ), //sigmoid - 1 => layer_results.push( selu(modified_dotprod(&node, &results[layer_index])) ), //selu - 2 => layer_results.push( pelu(modified_dotprod(&node, &results[layer_index])) ), //pelu - 3 => layer_results.push( lrelu(modified_dotprod(&node, &results[layer_index])) ), //lrelu - 4 => layer_results.push( linear(modified_dotprod(&node, &results[layer_index])) ), //linear - _ => layer_results.push( tanh(modified_dotprod(&node, &results[layer_index])) ), //tanh - } + activation = self.hid_act; + } + match activation { + Activation::Sigmoid => layer_results.push( sigmoid(modified_dotprod(&node, &results[layer_index])) ), + Activation::SELU => layer_results.push( selu(modified_dotprod(&node, &results[layer_index])) ), + Activation::PELU => layer_results.push( pelu(modified_dotprod(&node, &results[layer_index])) ), + Activation::LRELU => layer_results.push( lrelu(modified_dotprod(&node, &results[layer_index])) ), + Activation::Linear => layer_results.push( linear(modified_dotprod(&node, &results[layer_index])) ), + Activation::Tanh => layer_results.push( tanh(modified_dotprod(&node, &results[layer_index])) ), } } results.push(layer_results); @@ -492,12 +473,12 @@ impl NN { // calculate error for this node if layer_index == layers.len() - 1 { let act_deriv = match self.out_act { //output activation - 0 => result * (1.0 - result), //sigmoid - 1 => if result >= 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu - 2 => if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }, //pelu - 3 => if result >= 0.0f64 { 1.0 } else { LRELU_FACTOR }, //lrelu - 4 => 1.0, //linear - _ => 1.0 - result * result, //tanh + Activation::Sigmoid => result * (1.0 - result), + Activation::SELU => if result >= 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, + Activation::PELU => if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }, + Activation::LRELU => if result >= 0.0f64 { 1.0 } else { LRELU_FACTOR }, + Activation::Linear => 1.0, + Activation::Tanh => 1.0 - result * result, }; node_error = act_deriv * (targets[node_index] - result); } else { @@ -507,12 +488,12 @@ impl NN { sum += next_node[node_index+1] * next_node_error_data; // +1 because the 0th weight is the threshold } let act_deriv = match self.hid_act { //hidden activation - 0 => result * (1.0 - result), //sigmoid - 1 => if result >= 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, //selu - 2 => if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }, //pelu - 3 => if result >= 0.0f64 { 1.0 } else { LRELU_FACTOR }, //lrelu - 4 => 1.0, //linear - _ => 1.0 - result * result, //tanh + Activation::Sigmoid => result * (1.0 - result), + Activation::SELU => if result >= 0.0f64 { SELU_FACTOR_A } else { result + SELU_FACTOR_A * SELU_FACTOR_B }, + Activation::PELU => if result >= 0.0f64 { PELU_FACTOR_A / PELU_FACTOR_B } else { (result + PELU_FACTOR_A) / PELU_FACTOR_B }, + Activation::LRELU => if result >= 0.0f64 { 1.0 } else { LRELU_FACTOR }, + Activation::Linear => 1.0, + Activation::Tanh => 1.0 - result * result, }; node_error = act_deriv * sum; } diff --git a/tests/xor.rs b/tests/xor.rs index 86fbf0f..e6fa5f1 100644 --- a/tests/xor.rs +++ b/tests/xor.rs @@ -13,7 +13,7 @@ fn xor_4layers() { ]; // create a new neural network - let mut net1 = NN::new(&[2,3,3,1], Activation::LRELU, Activation::Sigmoid); + let mut net1 = NN::new(&[2,3,3,1], Activation::PELU, Activation::Sigmoid); // train the network net1.train(&examples) From ded6672e142645f56ece9fe16cd5c9c962747a45 Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Wed, 8 Nov 2017 13:30:59 +0100 Subject: [PATCH 19/23] fit readme to NN changes --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4fe1694..395e7c3 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ given examples. See the documentation for the `NN` and `Trainer` structs for more details. ```rust -use nn::{NN, HaltCondition}; +use nn::{NN, HaltCondition, Activation}; // create examples of the XOR function // the network is trained on tuples of vectors where the first vector @@ -43,7 +43,7 @@ let examples = [ // that specifies the number of layers and the number of nodes in each layer // in this case we have an input layer with 2 nodes, one hidden layer // with 3 nodes and the output layer has 1 node -let mut net = NN::new(&[2, 3, 1]); +let mut net = NN::new(&[2, 3, 1], Activation::PELU, Activation::Sigmoid); // train the network on the examples of the XOR function // all methods seen here are optional except go() which must be called to begin training From 78d0eacfc00580081c4da88fa9a54cd9c05b6885 Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Wed, 8 Nov 2017 13:43:18 +0100 Subject: [PATCH 20/23] readd trayis --- travis.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 travis.yml diff --git a/travis.yml b/travis.yml new file mode 100644 index 0000000..613564f --- /dev/null +++ b/travis.yml @@ -0,0 +1 @@ +language: rust \ No newline at end of file From e311f1d8f149c2fd13b2765b826aa0a170c899e1 Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Wed, 8 Nov 2017 13:50:17 +0100 Subject: [PATCH 21/23] trayis rename --- travis.yml => .travis.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename travis.yml => .travis.yml (100%) diff --git a/travis.yml b/.travis.yml similarity index 100% rename from travis.yml rename to .travis.yml From db7d95eefc4fa149f7adbd92e6ca2575c0788c99 Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Thu, 21 Dec 2017 16:19:09 +0100 Subject: [PATCH 22/23] remove warnings, that came up after rustc update --- src/lib.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8c635dd..4da4578 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,11 +64,11 @@ extern crate serde; extern crate serde_json; extern crate rand; -use HaltCondition::{ Epochs, MSE, Timer }; -use LearningMode::{ Incremental }; +use HaltCondition::{Epochs, MSE, Timer}; +use LearningMode::{Incremental}; use std::iter::{Zip, Enumerate}; use std::slice; -use std::time::{ Duration, Instant }; +use std::time::{Duration, Instant}; use rand::distributions::{Normal, IndependentSample}; const DEFAULT_LEARNING_RATE:f64 = 0.3; @@ -436,10 +436,10 @@ impl NN { // updates all weights in the network fn update_weights(&mut self, network_weight_updates: &Vec>>, prev_deltas: &mut Vec>>, rate: f64, lambda: f64, momentum: f64) { for layer_index in 0..self.layers.len() { - let mut layer = &mut self.layers[layer_index]; + let layer = &mut self.layers[layer_index]; let layer_weight_updates = &network_weight_updates[layer_index]; for node_index in 0..layer.len() { - let mut node = &mut layer[node_index]; + let node = &mut layer[node_index]; let node_weight_updates = &layer_weight_updates[node_index]; for weight_index in 0..node.len() { let weight_update = node_weight_updates[weight_index]; From 4af774e92e2d357cdfd400fa620e4a69a3ab8f98 Mon Sep 17 00:00:00 2001 From: FlixCoder Date: Thu, 21 Dec 2017 16:19:23 +0100 Subject: [PATCH 23/23] add example --- examples/selector.rs | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 examples/selector.rs diff --git a/examples/selector.rs b/examples/selector.rs new file mode 100644 index 0000000..d1f8289 --- /dev/null +++ b/examples/selector.rs @@ -0,0 +1,43 @@ +extern crate nn; + +use nn::{NN, HaltCondition, Activation}; + +const ACTIONS:u32 = 10; + + +fn main() +{ + // create examples of the xor function + let mut examples = Vec::new(); + for i in 0..ACTIONS + { + let mut result = Vec::new(); + for j in 0..ACTIONS + { + if j == i { result.push(1.0); } + else { result.push(0.0); } + } + let example = (vec![i as f64], result); + examples.push(example); + } + + // create a new neural network + let mut nn = NN::new(&[1, 10, ACTIONS], Activation::PELU, Activation::Sigmoid); + + // train the network + nn.train(&examples) + .log_interval(Some(1000)) + .halt_condition( HaltCondition::MSE(0.01) ) + .rate(0.025) + .momentum(0.5) + .lambda(0.00005) + .go(); + + // print results of the trained network + for &(ref input, _) in examples.iter() + { + let result = nn.run(input); + let print:Vec = result.iter().map(|x:&f64| { format!("{:4.2}", (*x * 100.0).round() / 100.0) }).collect(); + println!("{:1.0} -> {:?}", input[0], print); + } +}