SparseConvNet/NetworkInNetworkLayer.h at master · SpiderMonkey1975/SparseConvNet · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#pragma once
#include <fstream>
#include "SpatiallySparseLayer.h"
#include "Rng.h"

class NetworkInNetworkLayer : public SpatiallySparseLayer {
private:
  RNG rng;
public:
  vectorCUDA<float> W; //Weights
  vectorCUDA<float> MW; //momentum
  vectorCUDA<float> w; //shrunk versions
  vectorCUDA<float> dw; //For backprop
  vectorCUDA<float> B; //Weights
  vectorCUDA<float> MB; //momentum
  vectorCUDA<float> b; //shrunk versions
  vectorCUDA<float> db; //For backprop
  ActivationFunction fn;
  int nFeaturesIn;
  int nFeaturesOut;
  float dropout;
  NetworkInNetworkLayer(int nFeaturesIn, int nFeaturesOut,
                        float dropout=0,ActivationFunction fn=NOSIGMOID,
                        float alpha=1//used to determine intialization weights only
                        );
  void preprocess
  (SpatiallySparseBatch &batch,
   SpatiallySparseBatchInterface &input,
   SpatiallySparseBatchInterface &output);
  void forwards
  (SpatiallySparseBatch &batch,
   SpatiallySparseBatchInterface &input,
   SpatiallySparseBatchInterface &output);
  void scaleWeights
  (SpatiallySparseBatchInterface &input,
   SpatiallySparseBatchInterface &output,
   float& scalingUnderneath,
   bool topLayer);
  void backwards
  (SpatiallySparseBatch &batch,
   SpatiallySparseBatchInterface &input,
   SpatiallySparseBatchInterface &output,
   float learningRate,
   float momentum);
  void loadWeightsFromStream(std::ifstream &f);
  void putWeightsToStream(std::ofstream &f);
  int calculateInputSpatialSize(int outputSpatialSize);
};

__global__ void dShrinkMatrixForDropout
(float* m, float* md,
 int* inFeaturesPresent, int* outFeaturesPresent,
 int nOut, int nOutDropout);
__global__ void dShrinkVectorForDropout
(float* m, float* md, int* outFeaturesPresent, int nOut, int nOutDropout);
__global__ void dGradientDescent
(float* d_delta, float* d_momentum, float* d_weights, int nOut, float learningRate, float momentum);
__global__ void dGradientDescentShrunkMatrix
(float* d_delta, float* d_momentum, float* d_weights,
 int nOut, int nOutDropout,
 int* inFeaturesPresent, int* outFeaturesPresent,
 float learningRate,float momentum);
__global__ void dGradientDescentShrunkVector
(float* d_delta, float* d_momentum, float* d_weights,
 int nOut, int nOutDropout,
 int* outFeaturesPresent,
 float learningRate,float momentum);
void columnSum(float* matrix, float* target, int nRows, int nColumns);