From e51471350c68ea8bf82efc73e70323562334b791 Mon Sep 17 00:00:00 2001 From: botforge Date: Fri, 20 Sep 2019 07:39:06 -0700 Subject: [PATCH 01/48] Affine FwdandBack done --- .../character_recognition/CMakeLists.txt | 2 +- .../character_recognition/mlp.cu | 88 ++++++++++++++++--- .../character_recognition/mlp.h | 22 +++++ Project2-Character-Recognition/src/main.cpp | 16 +++- 4 files changed, 110 insertions(+), 18 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/CMakeLists.txt b/Project2-Character-Recognition/character_recognition/CMakeLists.txt index 7446175..c5e28b0 100644 --- a/Project2-Character-Recognition/character_recognition/CMakeLists.txt +++ b/Project2-Character-Recognition/character_recognition/CMakeLists.txt @@ -7,5 +7,5 @@ set(SOURCE_FILES cuda_add_library(character_recognition ${SOURCE_FILES} - OPTIONS -arch=sm_20 + OPTIONS -arch=sm_75 ) diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index 5a3ed7f..d865a60 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -2,7 +2,9 @@ #include #include "common.h" #include "mlp.h" +#include +#define blockSize 512 namespace CharacterRecognition { using Common::PerformanceTimer; PerformanceTimer& timer() @@ -10,18 +12,78 @@ namespace CharacterRecognition { static PerformanceTimer timer; return timer; } - - // TODO: __global__ - - /** - * Example of use case (follow how you did it in stream compaction) - */ - /*void scan(int n, int *odata, const int *idata) { - timer().startGpuTimer(); - // TODO - timer().endGpuTimer(); - } - */ + __global__ void kernInitWeightsBias(float *W, float *b, int inputDim, int num_samples, int outputDim) { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index >= inputDim * outputDim) { + return; + } + thrust::default_random_engine rng; + thrust::uniform_real_distribution dist(0.0, 1.0); + W[index] = dist(rng); + int y = index / outputDim; + b[y] = 0; + } + + __global__ void kernAffineForward(float *W, float *b, float *in, float *out, int inputDim, int outputDim, int num_samples, bool sigmoid) { + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + float val = 0; + + if (row < num_samples && col < inputDim) { + for (int i = 0; i < inputDim; i++) { + val += W[row * inputDim + i] * in[i * inputDim + col]; + } + val += b[row]; + } + out[row * outputDim + col] = sigmoid ? val : 1/(1+__expf(-val)); + } + + __global__ void kernAffineBackward() { + int row = blockIdx.y * blockDim.y + threadIdx.y; + int col = blockIdx.x * blockDim.x + threadIdx.x; + float val = 0; + + if (row < num_samples && col < inputDim) { + for (int i = 0; i < inputDim; i++) { + val += W[row * inputDim + i] * in[i * inputDim + col]; + } + val += b[row]; + } + out[row * outputDim + col] = sigmoid ? val : 1/(1+__expf(-val)); + } + + //AffineLayer + AffineLayer::AffineLayer(int idim, int odim) : inputDim(idim), outputDim(odim), softmax(true), eval(false) { + //Malloc Weights & Biases + cudaMalloc(&W, idim * odim * sizeof(float)); + checkCUDAError("cuda Malloc W failed"); + cudaMalloc(&b, odim * sizeof(float)); + checkCUDAError("cuda Malloc b failed"); + + //Call Initializer Kernels + dim3 fullBlocksPerGrid((inputDim * outputDim - 1) / blockSize); + kernInitWeightsBias<<>>(W, b, inputDim, outputDim); + } + + void AffineLayer::forward(float *in, float *out, int num_samples) { + /*Uses W & b to perform forward pass on an Affine Layer (Assumes dimensions are correct or things will go very wrong) + in: Input array of shape inputDim * num_samples + out: Output array of shape outputDim * num_samples (to be filled in) + */ + //Malloc the input matrix and an output matrix (should I even do this? Memcpy?) + cudaMalloc(&in, inputDim * num_samples * sizeof(float)); + checkCUDAError("cuda Malloc in failed"); + cudaMalloc(&out, outputDim * num_samples * sizeof(float)); + checkCUDAError("cuda Malloc in failed"); + + //Call Affine Forward Kernel + dim3 affine_blocksize(8, 8); + dim3 numBlocks((outputDim + affine_blocksize.x - 1) / affine_blocksize.x, (num_samples + affine_blocksize.y - 1) / affine_blocksize.y); + kernAffineForward<<>>(W, b, in, out, inputDim, outputDim, num_samples, sigmoid); + - // TODO: implement required elements for MLP sections 1 and 2 here + //delete + cudaFree(&out); + cudaFree(&in); + } } diff --git a/Project2-Character-Recognition/character_recognition/mlp.h b/Project2-Character-Recognition/character_recognition/mlp.h index 2096228..9a5350f 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.h +++ b/Project2-Character-Recognition/character_recognition/mlp.h @@ -1,9 +1,31 @@ #pragma once +#include #include "common.h" namespace CharacterRecognition { Common::PerformanceTimer& timer(); + class AffineLayer{ + float *W; + float *b; + int inputDim, outputDim; + bool sigmoid; + bool eval; + public: + AffineLayer(int idim, int odim); + void forward(float *in, float *out, int num_samples); + void backward(float *dout, float *dw, float *dx, float *db); + void setEval(bool state); + void setSigmoid(bool state); + char* getType(); + }; + + class FCN { + std::vector layers; + public: + FCN(int inputDim, int outputDim, int numHiddenLayers, int *hiddenDims); + void forward(float *input, float *ouput, bool eval); + }; // TODO: implement required elements for MLP sections 1 and 2 here } diff --git a/Project2-Character-Recognition/src/main.cpp b/Project2-Character-Recognition/src/main.cpp index 11dd534..175f63d 100644 --- a/Project2-Character-Recognition/src/main.cpp +++ b/Project2-Character-Recognition/src/main.cpp @@ -18,8 +18,15 @@ int *b = new int[SIZE]; int *c = new int[SIZE]; int main(int argc, char* argv[]) { - // Scan tests + //Character Recognition Tests + printf("\n"); + printf("****************\n"); + printf("** SCAN TESTS **\n"); + printf("****************\n"); + + // Scan tests + /* printf("\n"); printf("****************\n"); printf("** SCAN TESTS **\n"); @@ -37,6 +44,7 @@ int main(int argc, char* argv[]) { StreamCompaction::CPU::scan(SIZE, b, a); printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); printArray(SIZE, b, true); + zeroArray(SIZE, c); printDesc("cpu scan, non-power-of-two"); @@ -51,13 +59,13 @@ int main(int argc, char* argv[]) { printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); //printArray(SIZE, c, true); printCmpResult(SIZE, b, c); - + */ /* For bug-finding only: Array of 1s to help find bugs in stream compaction or scan onesArray(SIZE, c); printDesc("1s array for finding bugs"); StreamCompaction::Naive::scan(SIZE, c, a); printArray(SIZE, c, true); */ - + /* zeroArray(SIZE, c); printDesc("naive scan, non-power-of-two"); StreamCompaction::Naive::scan(NPOT, c, a); @@ -144,7 +152,7 @@ int main(int argc, char* argv[]) { printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); //printArray(count, c, true); printCmpLenResult(count, expectedNPOT, b, c); - + */ system("pause"); // stop Win32 console from closing on exit delete[] a; delete[] b; From d05672b0008e4fb62c0c06cd8a4275d911b2c5b0 Mon Sep 17 00:00:00 2001 From: botforge Date: Sat, 21 Sep 2019 05:32:10 -0700 Subject: [PATCH 02/48] Almost done with backprop --- .../character_recognition/mlp.cu | 182 ++++++++++++++---- .../character_recognition/mlp.h | 11 +- Project2-Character-Recognition/src/main.cpp | 27 ++- 3 files changed, 179 insertions(+), 41 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index d865a60..0fa14e5 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -1,5 +1,5 @@ #include -#include +#include Shape #include "common.h" #include "mlp.h" #include @@ -12,78 +12,188 @@ namespace CharacterRecognition { static PerformanceTimer timer; return timer; } - __global__ void kernInitWeightsBias(float *W, float *b, int inputDim, int num_samples, int outputDim) { + __host__ __device__ unsigned int hash(unsigned int a) { + a = (a + 0x7ed55d16) + (a << 12); + a = (a ^ 0xc761c23c) ^ (a >> 19); + a = (a + 0x165667b1) + (a << 5); + a = (a + 0xd3a2646c) ^ (a << 9); + a = (a + 0xfd7046c5) + (a << 3); + a = (a ^ 0xb55a4f09) ^ (a >> 16); + return a; + } + __global__ void kernInitWeightsBias(float *W, float *b, int inputDim, int outputDim){ + //Random Weight Initialization & Zero Bias Initialization int index = (blockIdx.x * blockDim.x) + threadIdx.x; if (index >= inputDim * outputDim) { return; } - thrust::default_random_engine rng; + thrust::default_random_engine rng(hash((int)(index * inputDim * outputDim))); thrust::uniform_real_distribution dist(0.0, 1.0); W[index] = dist(rng); int y = index / outputDim; b[y] = 0; } - __global__ void kernAffineForward(float *W, float *b, float *in, float *out, int inputDim, int outputDim, int num_samples, bool sigmoid) { - int row = blockIdx.y * blockDim.y + threadIdx.y; - int col = blockIdx.x * blockDim.x + threadIdx.x; + __global__ void kernAffineForward(float *W, float *b, float *in, float *out, int inputDim, int outputDim, int numSamples, bool sigmoid) { + /* + W: Shape inputDim x outputDim + b: Shape outputDim + in: Shape numSamples x inputDim + out: Shape numSamples x outputDim + */ + int index = blockIdx.x * blockDim.x + threadIdx.x; + int row = index / outputDim; + int col = index % outputDim; float val = 0; - - if (row < num_samples && col < inputDim) { + if (row < numSamples && col < outputDim) { for (int i = 0; i < inputDim; i++) { - val += W[row * inputDim + i] * in[i * inputDim + col]; + val += in[row * inputDim + i] * W[i * outputDim + col]; } val += b[row]; } - out[row * outputDim + col] = sigmoid ? val : 1/(1+__expf(-val)); + out[row * outputDim + col] = sigmoid ? 1/(1+__expf(-val)) : val; + } + + __device__ float applySigmoid(float x) { + return 1 / (1 + __expf(-x)); + } + + __device__ float dSigmoid(float x) { + return x * (1 - x); + } + + __global__ void kern_dSoftmax(float *dout, float *doutLinear, int numSamples, int outputDim) { + //Apply softmax across entire dout matrix (dout is outputDim x + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index >= numSamples * outputDim) { + return; + } + float doutidx = dout[index]; + doutLinear[index] = doutidx * (1 - doutidx); } - __global__ void kernAffineBackward() { - int row = blockIdx.y * blockDim.y + threadIdx.y; - int col = blockIdx.x * blockDim.x + threadIdx.x; + __global__ void kern_dIn(float *doutLinear, float *W, float *din, int inputDim, int outputDim, int numSamples) { + /* Effectively calculates matmul(doutLinear, W.T) + doutLinear: outputDim x numSamples - each element is dL/dY where Y = XW + b + W: inputDim x outputDim + din: inputDim x numSamples - each element is dL/din_(i,j) + */ + int index = blockIdx.x * blockDim.x + threadIdx.x; + int row = index / inputDim; + int col = index % inputDim; float val = 0; + if (row < numSamples && col < inputDim) { + for (int i = 0; i < outputDim; i++) { + val += doutLinear[row * outputDim + i] * W[col * outputDim + i]; + } + } + din[row * inputDim + col] = val; + } - if (row < num_samples && col < inputDim) { - for (int i = 0; i < inputDim; i++) { - val += W[row * inputDim + i] * in[i * inputDim + col]; + __global__ void kern_dW(float *W, float *b, float *doutLinear, float *in, int inputDim, int outputDim, int numSamples, float lr) { + /* Effectively calculates matmul(input.T, doutLinear) and applies an update + W: inputDim x outputDim (We do gradient descent here) + b: outputDim (we do gradient decent here too) + doutLinear: outputDim x numSamples - each element is dL/dY where Y = XW + b + in: inputDim x numSamples + lr: learning rate + */ + int index = blockIdx.x * blockDim.x + threadIdx.x; + int row = index / outputDim; + int col = index % outputDim; + float val = 0; + float dbval = 0; + float currW = W[row * outputDim + col]; + float currb = b[col]; + float doutLinearIdx = 0; + if (row < inputDim && col < outputDim) { + for (int i = 0; i < numSamples; i++) { + doutLinearIdx = doutLinear[i * outputDim + col]; + val += in[row * inputDim + i] * doutLinearIdx; + dbval += doutLinearIdx; } - val += b[row]; } - out[row * outputDim + col] = sigmoid ? val : 1/(1+__expf(-val)); + W[row * outputDim + col] = currW - lr * (val); + b[col] = currb - lr * (dbval); } //AffineLayer - AffineLayer::AffineLayer(int idim, int odim) : inputDim(idim), outputDim(odim), softmax(true), eval(false) { - //Malloc Weights & Biases + AffineLayer::AffineLayer(int idim, int odim) : numSamples(0), inputDim(idim), outputDim(odim), sigmoid(true), eval(false), doneFwd(false){ + //Malloc Weights, Biases cudaMalloc(&W, idim * odim * sizeof(float)); checkCUDAError("cuda Malloc W failed"); cudaMalloc(&b, odim * sizeof(float)); checkCUDAError("cuda Malloc b failed"); //Call Initializer Kernels - dim3 fullBlocksPerGrid((inputDim * outputDim - 1) / blockSize); + dim3 fullBlocksPerGrid((inputDim * outputDim + blockSize - 1) / blockSize); kernInitWeightsBias<<>>(W, b, inputDim, outputDim); } + void AffineLayer::setSigmoid(bool state) { + sigmoid = state; + } + void AffineLayer::setEval(bool state) { + eval = state; + } - void AffineLayer::forward(float *in, float *out, int num_samples) { - /*Uses W & b to perform forward pass on an Affine Layer (Assumes dimensions are correct or things will go very wrong) - in: Input array of shape inputDim * num_samples - out: Output array of shape outputDim * num_samples (to be filled in) + float* AffineLayer::forward(float *in, int ns) { + /*Uses W & b to perform forward pass on an Affine Layer + Assumes dev_input is set (on GPU), numSamples is set and eval is set */ - //Malloc the input matrix and an output matrix (should I even do this? Memcpy?) - cudaMalloc(&in, inputDim * num_samples * sizeof(float)); - checkCUDAError("cuda Malloc in failed"); - cudaMalloc(&out, outputDim * num_samples * sizeof(float)); - checkCUDAError("cuda Malloc in failed"); + //Malloc the input matrix and an output matrix + numSamples = ns; + cudaMalloc((void**)&dev_in, inputDim * numSamples * sizeof(float)); + checkCUDAError("cuda Malloc dev_in in failed"); + cudaMalloc((void**)&dev_out, outputDim * numSamples * sizeof(float)); + checkCUDAError("cuda Malloc dev_out in failed"); + + //Memcpy the *in information into dev_in + cudaMemcpy(dev_in, in, inputDim * numSamples * sizeof(float), cudaMemcpyHostToDevice); //Call Affine Forward Kernel - dim3 affine_blocksize(8, 8); - dim3 numBlocks((outputDim + affine_blocksize.x - 1) / affine_blocksize.x, (num_samples + affine_blocksize.y - 1) / affine_blocksize.y); - kernAffineForward<<>>(W, b, in, out, inputDim, outputDim, num_samples, sigmoid); + int numBlocks = (numSamples * outputDim + blockSize - 1) / blockSize; + kernAffineForward<<>>(W, b, dev_in, dev_out, inputDim, outputDim, numSamples, sigmoid); + + //Memcpy out the *out and *in information from dev_out + float *out = new float[outputDim * numSamples]; + + //free (dont free dev_in because you'll need it for backprop) + cudaFree(&dev_out); + return out; + } + + float* AffineLayer::backward(float *dout, float lr){ + /* Does backprop and one gradient update for W & b & returns din + dout: upstream gradient coming in + lr: learning rate + Returns + */ + //Malloc the input matrix and an output matrix + cudaMalloc((void**)&dev_dout, outputDim * numSamples * sizeof(float)); + checkCUDAError("cuda Malloc dev_dout in failed"); + cudaMalloc((void**)&dev_din, inputDim * numSamples * sizeof(float)); + checkCUDAError("cuda Malloc dev_din in failed"); + cudaMalloc((void**)&dev_doutLinear, outputDim * numSamples * sizeof(float)); + checkCUDAError("cuda Malloc dev_din in failed"); + + //Memcpy the *dout information into dev_dout + cudaMemcpy(dev_dout, dout, outputDim * numSamples * sizeof(float), cudaMemcpyHostToDevice); + checkCUDAError("cuda Memcpy dout in failed"); + + //Make 3 diff grid layouts + dim3 weightBiasGrid((inputDim * outputDim + blockSize - 1) / blockSize); + dim3 outputGrid = (numSamples * outputDim + blockSize - 1) / blockSize; + dim3 inputGrid = ((numSamples * inputDim + blockSize - 1) / blockSize); + + //Get derivative of softmax, and update + kern_dSoftmax <<>>(dev_dout, dev_doutLinear, inputDim, outputDim); + cudaFree(&dev_dout); + //Use transposed matrix to compute dIn + kern_dIn << > > (dev_doutLinear, W, dev_din, inputDim, outputDim, numSamples); - //delete - cudaFree(&out); - cudaFree(&in); + //Update dw + kern_dW<<>> + return NULL; } } diff --git a/Project2-Character-Recognition/character_recognition/mlp.h b/Project2-Character-Recognition/character_recognition/mlp.h index 9a5350f..be0f7b4 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.h +++ b/Project2-Character-Recognition/character_recognition/mlp.h @@ -8,13 +8,20 @@ namespace CharacterRecognition { class AffineLayer{ float *W; float *b; + float *dev_in; + float *dev_out; + float *dev_dout; + float *dev_doutLinear; + float *dev_din; + int numSamples; int inputDim, outputDim; bool sigmoid; bool eval; + bool doneFwd; public: AffineLayer(int idim, int odim); - void forward(float *in, float *out, int num_samples); - void backward(float *dout, float *dw, float *dx, float *db); + float* forward(float *in, int num_samples); + float* backward(float *dout, float lr); void setEval(bool state); void setSigmoid(bool state); char* getType(); diff --git a/Project2-Character-Recognition/src/main.cpp b/Project2-Character-Recognition/src/main.cpp index 175f63d..85fb538 100644 --- a/Project2-Character-Recognition/src/main.cpp +++ b/Project2-Character-Recognition/src/main.cpp @@ -11,19 +11,40 @@ #include #include "testing_helpers.hpp" -const int SIZE = 1 << 8; // feel free to change the size of array +const int SIZE = 1 << 3; // feel free to change the size of array const int NPOT = SIZE - 3; // Non-Power-Of-Two int *a = new int[SIZE]; int *b = new int[SIZE]; int *c = new int[SIZE]; +void printFloatArray(float *x, int n) { + printf(" [ "); + for (int i = 0; i < n; i++) { + printf("%f ", x[i]); + } + printf("]\n"); +} + int main(int argc, char* argv[]) { //Character Recognition Tests printf("\n"); printf("****************\n"); - printf("** SCAN TESTS **\n"); + printf("** CHARACTER RECOGNITION TESTS **\n"); printf("****************\n"); - + //Input Array + float *x = new float[SIZE]; + for (int i = 0; i < SIZE; ++i) { + x[i] = (float)i; + } + printFloatArray(x, SIZE); + //Build Layers + float *out; + CharacterRecognition::AffineLayer layer1(SIZE, 4); + layer1.setSigmoid(false); + out = layer1.forward(x, 1); + + //print out float array + printFloatArray(out, 4); // Scan tests /* From fa5bc2c982f84558c886bf1060521483b2ccc925 Mon Sep 17 00:00:00 2001 From: botforge Date: Sat, 21 Sep 2019 05:38:25 -0700 Subject: [PATCH 03/48] done with backprop --- .../character_recognition/mlp.cu | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index 0fa14e5..0ab4e10 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -192,8 +192,13 @@ namespace CharacterRecognition { //Use transposed matrix to compute dIn kern_dIn << > > (dev_doutLinear, W, dev_din, inputDim, outputDim, numSamples); - //Update dw - kern_dW<<>> - return NULL; + //Update dw and db + kern_dW << > > (W, b, dev_doutLinear, dev_in, inputDim, outputDim, numSamples, lr); + + //Memcpy back the din info + float *din = new float[outputDim * numSamples]; + cudaMemcpy(dev_din, din, inputDim * numSamples * sizeof(float), cudaMemcpyDeviceToHost); + checkCUDAError("cuda Memcpy din in failed"); + return din; } } From f8b153a7a3880401c8003bcbc9b5d87e5650fb12 Mon Sep 17 00:00:00 2001 From: botforge Date: Sat, 21 Sep 2019 07:20:28 -0700 Subject: [PATCH 04/48] foward prop definetly works --- .../character_recognition/mlp.cu | 21 ++- Project2-Character-Recognition/src/main.cpp | 159 +++--------------- 2 files changed, 36 insertions(+), 144 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index 0ab4e10..99562e4 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -29,7 +29,8 @@ namespace CharacterRecognition { } thrust::default_random_engine rng(hash((int)(index * inputDim * outputDim))); thrust::uniform_real_distribution dist(0.0, 1.0); - W[index] = dist(rng); + //W[index] = dist(rng); + W[index] = 0.1 * index; int y = index / outputDim; b[y] = 0; } @@ -62,7 +63,7 @@ namespace CharacterRecognition { return x * (1 - x); } - __global__ void kern_dSoftmax(float *dout, float *doutLinear, int numSamples, int outputDim) { + __global__ void kern_dSigmoid(float *dout, float *doutLinear, int numSamples, int outputDim) { //Apply softmax across entire dout matrix (dout is outputDim x int index = (blockIdx.x * blockDim.x) + threadIdx.x; if (index >= numSamples * outputDim) { @@ -156,6 +157,7 @@ namespace CharacterRecognition { //Memcpy out the *out and *in information from dev_out float *out = new float[outputDim * numSamples]; + cudaMemcpy(out, dev_out, outputDim * numSamples * sizeof(float), cudaMemcpyDeviceToHost); //free (dont free dev_in because you'll need it for backprop) cudaFree(&dev_out); @@ -185,20 +187,23 @@ namespace CharacterRecognition { dim3 outputGrid = (numSamples * outputDim + blockSize - 1) / blockSize; dim3 inputGrid = ((numSamples * inputDim + blockSize - 1) / blockSize); - //Get derivative of softmax, and update - kern_dSoftmax <<>>(dev_dout, dev_doutLinear, inputDim, outputDim); - cudaFree(&dev_dout); + if (sigmoid) { + //Get derivative of softmax, and update + kern_dSigmoid<<>>(dev_dout, dev_doutLinear, inputDim, outputDim); + cudaFree(&dev_dout); + } - //Use transposed matrix to compute dIn - kern_dIn << > > (dev_doutLinear, W, dev_din, inputDim, outputDim, numSamples); + //Use matrix to compute dIn + kern_dIn<<>>(dev_doutLinear, W, dev_din, inputDim, outputDim, numSamples); //Update dw and db - kern_dW << > > (W, b, dev_doutLinear, dev_in, inputDim, outputDim, numSamples, lr); + kern_dW<<>>(W, b, dev_doutLinear, dev_in, inputDim, outputDim, numSamples, lr); //Memcpy back the din info float *din = new float[outputDim * numSamples]; cudaMemcpy(dev_din, din, inputDim * numSamples * sizeof(float), cudaMemcpyDeviceToHost); checkCUDAError("cuda Memcpy din in failed"); + cudaFree(dev_din); return din; } } diff --git a/Project2-Character-Recognition/src/main.cpp b/Project2-Character-Recognition/src/main.cpp index 85fb538..2db8e36 100644 --- a/Project2-Character-Recognition/src/main.cpp +++ b/Project2-Character-Recognition/src/main.cpp @@ -26,154 +26,41 @@ void printFloatArray(float *x, int n) { } int main(int argc, char* argv[]) { - //Character Recognition Tests + /* + CHARACTER RECOGNITION TESTS + */ printf("\n"); printf("****************\n"); printf("** CHARACTER RECOGNITION TESTS **\n"); printf("****************\n"); - //Input Array - float *x = new float[SIZE]; - for (int i = 0; i < SIZE; ++i) { - x[i] = (float)i; + + //XOR Input Array + int numSamples = 4; + int inputDim = 2; + int outputDim = 3; + float *x = new float[numSamples * inputDim]; + for (int i = 0; i < SIZE; ++i) { + if (i % 2 == 0) { + x[i] = 1; + } + else { + x[i] = 0; + } } - printFloatArray(x, SIZE); + printFloatArray(x, numSamples * inputDim); + //Build Layers float *out; - CharacterRecognition::AffineLayer layer1(SIZE, 4); + CharacterRecognition::AffineLayer layer1(inputDim, outputDim); layer1.setSigmoid(false); - out = layer1.forward(x, 1); - - //print out float array - printFloatArray(out, 4); - - // Scan tests - /* - printf("\n"); - printf("****************\n"); - printf("** SCAN TESTS **\n"); - printf("****************\n"); - - genArray(SIZE - 1, a, 50); // Leave a 0 at the end to test that edge case - a[SIZE - 1] = 0; - printArray(SIZE, a, true); - // initialize b using StreamCompaction::CPU::scan you implement - // We use b for further comparison. Make sure your StreamCompaction::CPU::scan is correct. - // At first all cases passed because b && c are all zeroes. - zeroArray(SIZE, b); - printDesc("cpu scan, power-of-two"); - StreamCompaction::CPU::scan(SIZE, b, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - printArray(SIZE, b, true); - + /* FORWARD PROP */ + out = layer1.forward(x, numSamples); + printFloatArray(out, numSamples * outputDim); - zeroArray(SIZE, c); - printDesc("cpu scan, non-power-of-two"); - StreamCompaction::CPU::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - printArray(NPOT, b, true); - printCmpResult(NPOT, b, c); - - zeroArray(SIZE, c); - printDesc("naive scan, power-of-two"); - StreamCompaction::Naive::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); - */ - /* For bug-finding only: Array of 1s to help find bugs in stream compaction or scan - onesArray(SIZE, c); - printDesc("1s array for finding bugs"); - StreamCompaction::Naive::scan(SIZE, c, a); - printArray(SIZE, c, true); */ - /* - zeroArray(SIZE, c); - printDesc("naive scan, non-power-of-two"); - StreamCompaction::Naive::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); - printCmpResult(NPOT, b, c); + /* BACKWARD PROP */ - zeroArray(SIZE, c); - printDesc("work-efficient scan, power-of-two"); - StreamCompaction::Efficient::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); - zeroArray(SIZE, c); - printDesc("work-efficient scan, non-power-of-two"); - StreamCompaction::Efficient::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); - - zeroArray(SIZE, c); - printDesc("thrust scan, power-of-two"); - StreamCompaction::Thrust::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); - printCmpResult(SIZE, b, c); - - zeroArray(SIZE, c); - printDesc("thrust scan, non-power-of-two"); - StreamCompaction::Thrust::scan(NPOT, c, a); - printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); - - printf("\n"); - printf("*****************************\n"); - printf("** STREAM COMPACTION TESTS **\n"); - printf("*****************************\n"); - - // Compaction tests - - genArray(SIZE - 1, a, 4); // Leave a 0 at the end to test that edge case - a[SIZE - 1] = 0; - printArray(SIZE, a, true); - - int count, expectedCount, expectedNPOT; - - // initialize b using StreamCompaction::CPU::compactWithoutScan you implement - // We use b for further comparison. Make sure your StreamCompaction::CPU::compactWithoutScan is correct. - zeroArray(SIZE, b); - printDesc("cpu compact without scan, power-of-two"); - count = StreamCompaction::CPU::compactWithoutScan(SIZE, b, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - expectedCount = count; - printArray(count, b, true); - printCmpLenResult(count, expectedCount, b, b); - - zeroArray(SIZE, c); - printDesc("cpu compact without scan, non-power-of-two"); - count = StreamCompaction::CPU::compactWithoutScan(NPOT, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - expectedNPOT = count; - printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); - - zeroArray(SIZE, c); - printDesc("cpu compact with scan"); - count = StreamCompaction::CPU::compactWithScan(SIZE, c, a); - printElapsedTime(StreamCompaction::CPU::timer().getCpuElapsedTimeForPreviousOperation(), "(std::chrono Measured)"); - printArray(count, c, true); - printCmpLenResult(count, expectedCount, b, c); - - zeroArray(SIZE, c); - printDesc("work-efficient compact, power-of-two"); - count = StreamCompaction::Efficient::compact(SIZE, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(count, c, true); - printCmpLenResult(count, expectedCount, b, c); - - zeroArray(SIZE, c); - printDesc("work-efficient compact, non-power-of-two"); - count = StreamCompaction::Efficient::compact(NPOT, c, a); - printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); - */ system("pause"); // stop Win32 console from closing on exit delete[] a; delete[] b; From c1b740d23517fafd134dea1168ca57ec99770c62 Mon Sep 17 00:00:00 2001 From: botforge Date: Sat, 21 Sep 2019 10:22:01 -0700 Subject: [PATCH 05/48] this finally works double double --- .../character_recognition/mlp.cu | 102 +++++++++++++++--- .../character_recognition/mlp.h | 10 +- Project2-Character-Recognition/src/main.cpp | 27 +---- 3 files changed, 95 insertions(+), 44 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index 99562e4..a422571 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -1,5 +1,5 @@ #include -#include Shape +#include #include "common.h" #include "mlp.h" #include @@ -51,8 +51,8 @@ namespace CharacterRecognition { val += in[row * inputDim + i] * W[i * outputDim + col]; } val += b[row]; + out[row * outputDim + col] = sigmoid ? 1/(1+__expf(-val)) : val; } - out[row * outputDim + col] = sigmoid ? 1/(1+__expf(-val)) : val; } __device__ float applySigmoid(float x) { @@ -119,17 +119,37 @@ namespace CharacterRecognition { } //AffineLayer - AffineLayer::AffineLayer(int idim, int odim) : numSamples(0), inputDim(idim), outputDim(odim), sigmoid(true), eval(false), doneFwd(false){ - //Malloc Weights, Biases - cudaMalloc(&W, idim * odim * sizeof(float)); + AffineLayer::AffineLayer(int idim, int odim, int ns): numSamples(ns), inputDim(idim), outputDim(odim), sigmoid(true), eval(false), doneFwd(false){ + //Malloc Weights, Biases, in and out + cudaMalloc((void**)&W, idim * odim * sizeof(float)); checkCUDAError("cuda Malloc W failed"); - cudaMalloc(&b, odim * sizeof(float)); + cudaMalloc((void**)&b, odim * sizeof(float)); checkCUDAError("cuda Malloc b failed"); + cudaMalloc((void**)&dev_in, inputDim * numSamples * sizeof(float)); + checkCUDAError("cuda Malloc dev_in in failed"); + cudaMalloc((void**)&dev_out, outputDim * numSamples * sizeof(float)); + checkCUDAError("cuda Malloc dev_out in failed"); //Call Initializer Kernels dim3 fullBlocksPerGrid((inputDim * outputDim + blockSize - 1) / blockSize); kernInitWeightsBias<<>>(W, b, inputDim, outputDim); } + void AffineLayer::initWeights() { + float* temp = new float[inputDim * outputDim]; + for (int i = 0; i < inputDim * outputDim; ++i) { + temp[i] = i * 0.01; + } + cudaMemcpy(W, temp, inputDim * outputDim * sizeof(float), cudaMemcpyHostToDevice); + } + + void AffineLayer::initBias() { + float* tempb = new float[outputDim]; + for (int i = 0; i < outputDim; ++i) { + tempb[i] = i * 0.01; + } + cudaMemcpy(b, tempb, outputDim * sizeof(float), cudaMemcpyHostToDevice); + } + void AffineLayer::setSigmoid(bool state) { sigmoid = state; } @@ -141,13 +161,6 @@ namespace CharacterRecognition { /*Uses W & b to perform forward pass on an Affine Layer Assumes dev_input is set (on GPU), numSamples is set and eval is set */ - //Malloc the input matrix and an output matrix - numSamples = ns; - cudaMalloc((void**)&dev_in, inputDim * numSamples * sizeof(float)); - checkCUDAError("cuda Malloc dev_in in failed"); - cudaMalloc((void**)&dev_out, outputDim * numSamples * sizeof(float)); - checkCUDAError("cuda Malloc dev_out in failed"); - //Memcpy the *in information into dev_in cudaMemcpy(dev_in, in, inputDim * numSamples * sizeof(float), cudaMemcpyHostToDevice); @@ -160,7 +173,6 @@ namespace CharacterRecognition { cudaMemcpy(out, dev_out, outputDim * numSamples * sizeof(float), cudaMemcpyDeviceToHost); //free (dont free dev_in because you'll need it for backprop) - cudaFree(&dev_out); return out; } @@ -206,4 +218,66 @@ namespace CharacterRecognition { cudaFree(dev_din); return din; } + + void printFloatArray(float *x, int n) { + printf(" [ "); + for (int i = 0; i < n; i++) { + printf("%f ", x[i]); + } + printf("]\n"); + } + + void charRegTests() { + //Network Structure + int numSamples = 4; + int inputDim = 2; + int hiddenDim[1] = { 3 }; + int outputDim = 2; + + //XOR Input Array + float *x = new float[numSamples * inputDim]; + for (int i = 0; i < numSamples * inputDim; ++i) { + if (i % 2 == 0) { + x[i] = 1; + } + else { + x[i] = 0; + } + } + printFloatArray(x, numSamples * inputDim); + + //Build Layers + AffineLayer* layer1 = new AffineLayer(inputDim, hiddenDim[0], numSamples); + layer1->setSigmoid(false); + AffineLayer* layer1copy = new AffineLayer(inputDim, hiddenDim[0], numSamples); + layer1copy->setSigmoid(false); + //CharacterRecognition::AffineLayer layer2(hiddenDim[0], outputDim); + //layer2.setSigmoid(false); + + /* FORWARD PROP */ + float *out0, *out1; + out0 = layer1->forward(x, numSamples); + printFloatArray(out0, numSamples * outputDim); + printFloatArray(x, numSamples * inputDim); + out1 = layer1copy->forward(x, numSamples); + printFloatArray(out1, numSamples * outputDim); + /* + out1 = layer2.forward(out0, numSamples); + printFloatArray(out1, numSamples * outputDim); + */ + + /* CALCULATE LOSS */ + + /* BACKWARD PROP */ + float* din; + } + + float softmax_loss(float *pred, float *target, float *dout) { + /* Returns a float representing the loss, and updates dout + pred: Shape numSamples x outputDim + target: Shape numSamples + dout: Each element + */ + return 0.0; + } } diff --git a/Project2-Character-Recognition/character_recognition/mlp.h b/Project2-Character-Recognition/character_recognition/mlp.h index be0f7b4..ac6c992 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.h +++ b/Project2-Character-Recognition/character_recognition/mlp.h @@ -6,24 +6,26 @@ namespace CharacterRecognition { Common::PerformanceTimer& timer(); class AffineLayer{ - float *W; - float *b; float *dev_in; float *dev_out; float *dev_dout; float *dev_doutLinear; float *dev_din; + float *W; + float *b; int numSamples; int inputDim, outputDim; bool sigmoid; bool eval; bool doneFwd; public: - AffineLayer(int idim, int odim); + AffineLayer(int idim, int odim, int ns); float* forward(float *in, int num_samples); float* backward(float *dout, float lr); void setEval(bool state); void setSigmoid(bool state); + void initWeights(); + void initBias(); char* getType(); }; @@ -33,6 +35,6 @@ namespace CharacterRecognition { FCN(int inputDim, int outputDim, int numHiddenLayers, int *hiddenDims); void forward(float *input, float *ouput, bool eval); }; - // TODO: implement required elements for MLP sections 1 and 2 here + void charRegTests(); } diff --git a/Project2-Character-Recognition/src/main.cpp b/Project2-Character-Recognition/src/main.cpp index 2db8e36..5ac9ccf 100644 --- a/Project2-Character-Recognition/src/main.cpp +++ b/Project2-Character-Recognition/src/main.cpp @@ -34,32 +34,7 @@ int main(int argc, char* argv[]) { printf("** CHARACTER RECOGNITION TESTS **\n"); printf("****************\n"); - //XOR Input Array - int numSamples = 4; - int inputDim = 2; - int outputDim = 3; - float *x = new float[numSamples * inputDim]; - for (int i = 0; i < SIZE; ++i) { - if (i % 2 == 0) { - x[i] = 1; - } - else { - x[i] = 0; - } - } - printFloatArray(x, numSamples * inputDim); - - //Build Layers - float *out; - CharacterRecognition::AffineLayer layer1(inputDim, outputDim); - layer1.setSigmoid(false); - - /* FORWARD PROP */ - out = layer1.forward(x, numSamples); - printFloatArray(out, numSamples * outputDim); - - /* BACKWARD PROP */ - + CharacterRecognition::charRegTests(); system("pause"); // stop Win32 console from closing on exit delete[] a; From e253108e80bf2d71cb2201901954e2f587d1c888 Mon Sep 17 00:00:00 2001 From: botforge Date: Sat, 21 Sep 2019 10:40:29 -0700 Subject: [PATCH 06/48] everything fixed forward --- .../character_recognition/mlp.cu | 35 +++++-------------- .../character_recognition/mlp.h | 4 +-- Project2-Character-Recognition/src/main.cpp | 2 +- 3 files changed, 11 insertions(+), 30 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index a422571..436bac5 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -87,8 +87,8 @@ namespace CharacterRecognition { for (int i = 0; i < outputDim; i++) { val += doutLinear[row * outputDim + i] * W[col * outputDim + i]; } + din[row * inputDim + col] = val; } - din[row * inputDim + col] = val; } __global__ void kern_dW(float *W, float *b, float *doutLinear, float *in, int inputDim, int outputDim, int numSamples, float lr) { @@ -113,9 +113,9 @@ namespace CharacterRecognition { val += in[row * inputDim + i] * doutLinearIdx; dbval += doutLinearIdx; } + W[row * outputDim + col] = currW - lr * (val); + b[col] = currb - lr * (dbval); } - W[row * outputDim + col] = currW - lr * (val); - b[col] = currb - lr * (dbval); } //AffineLayer @@ -134,21 +134,6 @@ namespace CharacterRecognition { dim3 fullBlocksPerGrid((inputDim * outputDim + blockSize - 1) / blockSize); kernInitWeightsBias<<>>(W, b, inputDim, outputDim); } - void AffineLayer::initWeights() { - float* temp = new float[inputDim * outputDim]; - for (int i = 0; i < inputDim * outputDim; ++i) { - temp[i] = i * 0.01; - } - cudaMemcpy(W, temp, inputDim * outputDim * sizeof(float), cudaMemcpyHostToDevice); - } - - void AffineLayer::initBias() { - float* tempb = new float[outputDim]; - for (int i = 0; i < outputDim; ++i) { - tempb[i] = i * 0.01; - } - cudaMemcpy(b, tempb, outputDim * sizeof(float), cudaMemcpyHostToDevice); - } void AffineLayer::setSigmoid(bool state) { sigmoid = state; @@ -173,6 +158,7 @@ namespace CharacterRecognition { cudaMemcpy(out, dev_out, outputDim * numSamples * sizeof(float), cudaMemcpyDeviceToHost); //free (dont free dev_in because you'll need it for backprop) + cudaFree(dev_out); return out; } @@ -215,6 +201,7 @@ namespace CharacterRecognition { float *din = new float[outputDim * numSamples]; cudaMemcpy(dev_din, din, inputDim * numSamples * sizeof(float), cudaMemcpyDeviceToHost); checkCUDAError("cuda Memcpy din in failed"); + cudaFree(dev_din); return din; } @@ -227,7 +214,7 @@ namespace CharacterRecognition { printf("]\n"); } - void charRegTests() { + void XORTest() { //Network Structure int numSamples = 4; int inputDim = 2; @@ -251,20 +238,16 @@ namespace CharacterRecognition { layer1->setSigmoid(false); AffineLayer* layer1copy = new AffineLayer(inputDim, hiddenDim[0], numSamples); layer1copy->setSigmoid(false); - //CharacterRecognition::AffineLayer layer2(hiddenDim[0], outputDim); - //layer2.setSigmoid(false); + AffineLayer* layer2 = new AffineLayer(hiddenDim[0], outputDim, numSamples); + layer2->setSigmoid(false); /* FORWARD PROP */ float *out0, *out1; out0 = layer1->forward(x, numSamples); printFloatArray(out0, numSamples * outputDim); printFloatArray(x, numSamples * inputDim); - out1 = layer1copy->forward(x, numSamples); - printFloatArray(out1, numSamples * outputDim); - /* - out1 = layer2.forward(out0, numSamples); + out1 = layer2->forward(out0, numSamples); printFloatArray(out1, numSamples * outputDim); - */ /* CALCULATE LOSS */ diff --git a/Project2-Character-Recognition/character_recognition/mlp.h b/Project2-Character-Recognition/character_recognition/mlp.h index ac6c992..a19ab1f 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.h +++ b/Project2-Character-Recognition/character_recognition/mlp.h @@ -24,8 +24,6 @@ namespace CharacterRecognition { float* backward(float *dout, float lr); void setEval(bool state); void setSigmoid(bool state); - void initWeights(); - void initBias(); char* getType(); }; @@ -36,5 +34,5 @@ namespace CharacterRecognition { void forward(float *input, float *ouput, bool eval); }; // TODO: implement required elements for MLP sections 1 and 2 here - void charRegTests(); + void XORTest(); } diff --git a/Project2-Character-Recognition/src/main.cpp b/Project2-Character-Recognition/src/main.cpp index 5ac9ccf..5e2536f 100644 --- a/Project2-Character-Recognition/src/main.cpp +++ b/Project2-Character-Recognition/src/main.cpp @@ -34,7 +34,7 @@ int main(int argc, char* argv[]) { printf("** CHARACTER RECOGNITION TESTS **\n"); printf("****************\n"); - CharacterRecognition::charRegTests(); + CharacterRecognition::XORTest(); system("pause"); // stop Win32 console from closing on exit delete[] a; From b47afabdaa29cd8d07925fb83b91583c8c55bc72 Mon Sep 17 00:00:00 2001 From: botforge Date: Sat, 21 Sep 2019 11:55:58 -0700 Subject: [PATCH 07/48] still trying to figure out loss calculation --- .../character_recognition/mlp.cu | 61 ++++++++++++++++++- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index 436bac5..30201c1 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -118,6 +118,32 @@ namespace CharacterRecognition { } } + __global__ void kernStableSoftmax(float *pred, float *pred2, float *target, int *sums, int numSamples, int outputDim) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int row = index / outputDim; + float rowval = 0.0; + if (index < numSamples * outputDim) { + for (int i = 0; i < outputDim; i++) { + rowval += pred2[row * outputDim + i]; + } + sums[row] = rowval; + pred[index] = expf(pred2[index]); + pred[index] = pred2[index] / rowval; + } + } + + __global__ void kernSums(float *pred, int *sums, int numSamples, int outputDim) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + int row = index / outputDim; + float rowval = 0.0; + if (index < numSamples * outputDim) { + for (int i = 0; i < outputDim; i++) { + rowval += pred[row * outputDim + i]; + } + sums[row] = rowval; + } + } + //AffineLayer AffineLayer::AffineLayer(int idim, int odim, int ns): numSamples(ns), inputDim(idim), outputDim(odim), sigmoid(true), eval(false), doneFwd(false){ //Malloc Weights, Biases, in and out @@ -221,8 +247,9 @@ namespace CharacterRecognition { int hiddenDim[1] = { 3 }; int outputDim = 2; - //XOR Input Array + //XOR Input Array and Target Array float *x = new float[numSamples * inputDim]; + float *target = new float[numSamples * outputDim]; for (int i = 0; i < numSamples * inputDim; ++i) { if (i % 2 == 0) { x[i] = 1; @@ -231,6 +258,9 @@ namespace CharacterRecognition { x[i] = 0; } } + for (int i = 0; i < numSamples * outputDim; ++i) { + target[i] = 1; + } printFloatArray(x, numSamples * inputDim); //Build Layers @@ -251,16 +281,43 @@ namespace CharacterRecognition { /* CALCULATE LOSS */ + /* BACKWARD PROP */ float* din; } - float softmax_loss(float *pred, float *target, float *dout) { + float softmax_loss(float *pred, float *target, float *dout, int numSamples, int outputDim) { /* Returns a float representing the loss, and updates dout pred: Shape numSamples x outputDim target: Shape numSamples dout: Each element */ + //Alloc and copy predicted + float *dev_pred; + float *dev_pred2; + cudaMalloc((void**)&dev_pred, numSamples * outputDim * sizeof(float)); + checkCUDAError("cuda Malloc dev_pred failed"); + cudaMalloc((void**)&dev_pred2, numSamples * outputDim * sizeof(float)); + checkCUDAError("cuda Malloc dev_pred2 failed"); + cudaMemcpy(dev_pred, pred, numSamples * outputDim * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(dev_pred2, pred, numSamples * outputDim * sizeof(float), cudaMemcpyHostToDevice); + + //Alloc and copy predicted + float *dev_sum; + cudaMalloc((void**)&dev_sum, numSamples * sizeof(float)); + checkCUDAError("cuda Malloc dev_sum failed"); + + //Alloc and copy Target + float *dev_target; + cudaMalloc((void**)&dev_target, numSamples * sizeof(float)); + checkCUDAError("cuda Malloc dev_target failed"); + cudaMemcpy(dev_target, target, numSamples * sizeof(float), cudaMemcpyHostToDevice); + + //Apply Softmax to pred + dim3 outputGrid = (numSamples * outputDim + blockSize - 1) / blockSize; + kernSums << > > (float *pred, int *sums, int numSamples, int outputDim); + kernStableSoftmax << > >(dev_pred, dev_pred2, dev_sum, numSamples, outputDim); + kernCrossEntropy<<>>(dev_pred, ) return 0.0; } } From f31780e5ee861a3acc9df293e89f6a4bdfe29c71 Mon Sep 17 00:00:00 2001 From: botforge Date: Sat, 21 Sep 2019 14:43:49 -0700 Subject: [PATCH 08/48] one level backprop works fine --- .../character_recognition/mlp.cu | 136 ++++++++++++------ .../character_recognition/mlp.h | 6 +- 2 files changed, 96 insertions(+), 46 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index 30201c1..6d3de57 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -156,6 +156,7 @@ namespace CharacterRecognition { cudaMalloc((void**)&dev_out, outputDim * numSamples * sizeof(float)); checkCUDAError("cuda Malloc dev_out in failed"); + //Call Initializer Kernels dim3 fullBlocksPerGrid((inputDim * outputDim + blockSize - 1) / blockSize); kernInitWeightsBias<<>>(W, b, inputDim, outputDim); @@ -195,12 +196,11 @@ namespace CharacterRecognition { Returns */ //Malloc the input matrix and an output matrix + float *dev_dout, *dev_din, *dev_doutLinear; cudaMalloc((void**)&dev_dout, outputDim * numSamples * sizeof(float)); checkCUDAError("cuda Malloc dev_dout in failed"); cudaMalloc((void**)&dev_din, inputDim * numSamples * sizeof(float)); checkCUDAError("cuda Malloc dev_din in failed"); - cudaMalloc((void**)&dev_doutLinear, outputDim * numSamples * sizeof(float)); - checkCUDAError("cuda Malloc dev_din in failed"); //Memcpy the *dout information into dev_dout cudaMemcpy(dev_dout, dout, outputDim * numSamples * sizeof(float), cudaMemcpyHostToDevice); @@ -212,10 +212,15 @@ namespace CharacterRecognition { dim3 inputGrid = ((numSamples * inputDim + blockSize - 1) / blockSize); if (sigmoid) { + cudaMalloc((void**)&dev_doutLinear, outputDim * numSamples * sizeof(float)); + checkCUDAError("cuda Malloc dev_din in failed"); //Get derivative of softmax, and update kern_dSigmoid<<>>(dev_dout, dev_doutLinear, inputDim, outputDim); cudaFree(&dev_dout); } + else { + dev_doutLinear = dev_dout; + } //Use matrix to compute dIn kern_dIn<<>>(dev_doutLinear, W, dev_din, inputDim, outputDim, numSamples); @@ -224,10 +229,9 @@ namespace CharacterRecognition { kern_dW<<>>(W, b, dev_doutLinear, dev_in, inputDim, outputDim, numSamples, lr); //Memcpy back the din info - float *din = new float[outputDim * numSamples]; - cudaMemcpy(dev_din, din, inputDim * numSamples * sizeof(float), cudaMemcpyDeviceToHost); + float *din = new float[inputDim * numSamples]; + cudaMemcpy(din, dev_din, inputDim * numSamples * sizeof(float), cudaMemcpyDeviceToHost); checkCUDAError("cuda Memcpy din in failed"); - cudaFree(dev_din); return din; } @@ -240,6 +244,82 @@ namespace CharacterRecognition { printf("]\n"); } + void cpu_softmax(float *pred, int numSamples, int outputDim) { + float rowSum = 0; + for (int i = 0; i < numSamples; ++i) { + rowSum = 0; + for (int j = 0; j < outputDim; ++j) { + pred[i * outputDim + j] = exp(pred[i * outputDim + j]); + rowSum += pred[i * outputDim + j]; + } + for (int k = 0; k < outputDim; ++k){ + pred[i * outputDim + k] /= rowSum; + } + } + } + + float cpu_crossEntropy(float *pred, float *target, int numSamples, int outputDim, float* dout){ + float* log_likelihood = new float[numSamples]; + float llsum = 0; + for (int i = 0; i < numSamples; ++i) { + for (int c = 0; c < outputDim; ++c) { + float ting = pred[i * outputDim + c]; + dout[i * outputDim + c] = ting; + } + } + + for (int i = 0; i < numSamples; ++i) { + int offset = target[i]; + float ting = pred[i * outputDim + offset]; + log_likelihood[i] = -log(ting); + llsum += -log(ting); + dout[i * outputDim + offset] -= 1; + for (int c = 0; c < outputDim; ++c) { + dout[i * outputDim + c] /= numSamples; + } + } + return llsum / numSamples; + } + + float softmax_loss(float *pred, float *target, float *dout, int numSamples, int outputDim) { + /* Returns a float representing the loss, and updates dout + pred: Shape numSamples x outputDim + target: Shape numSamples + dout: Each element + */ + //Alloc and copy predicted + float *dev_pred; + float *dev_pred2; + cudaMalloc((void**)&dev_pred, numSamples * outputDim * sizeof(float)); + checkCUDAError("cuda Malloc dev_pred failed"); + cudaMalloc((void**)&dev_pred2, numSamples * outputDim * sizeof(float)); + checkCUDAError("cuda Malloc dev_pred2 failed"); + cudaMemcpy(dev_pred, pred, numSamples * outputDim * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(dev_pred2, pred, numSamples * outputDim * sizeof(float), cudaMemcpyHostToDevice); + + //Alloc and copy predicted + float *dev_sum; + cudaMalloc((void**)&dev_sum, numSamples * sizeof(float)); + checkCUDAError("cuda Malloc dev_sum failed"); + + //Alloc and copy Target + float *dev_target; + cudaMalloc((void**)&dev_target, numSamples * sizeof(float)); + checkCUDAError("cuda Malloc dev_target failed"); + cudaMemcpy(dev_target, target, numSamples * sizeof(float), cudaMemcpyHostToDevice); + printf("PRED INSIDE B4\n"); + printFloatArray(pred, numSamples * outputDim); + + //Apply Softmax to pred + cpu_softmax(pred, numSamples, outputDim); + + printf("PRED INSIDE AFTER\n"); + printFloatArray(pred, numSamples * outputDim); + + float loss = cpu_crossEntropy(pred, target, numSamples, outputDim, dout); + return loss; + } + void XORTest() { //Network Structure int numSamples = 4; @@ -262,6 +342,7 @@ namespace CharacterRecognition { target[i] = 1; } printFloatArray(x, numSamples * inputDim); + printFloatArray(target, numSamples); //Build Layers AffineLayer* layer1 = new AffineLayer(inputDim, hiddenDim[0], numSamples); @@ -275,49 +356,18 @@ namespace CharacterRecognition { float *out0, *out1; out0 = layer1->forward(x, numSamples); printFloatArray(out0, numSamples * outputDim); - printFloatArray(x, numSamples * inputDim); out1 = layer2->forward(out0, numSamples); printFloatArray(out1, numSamples * outputDim); /* CALCULATE LOSS */ - + float *dout = new float[outputDim * numSamples]; + float loss = softmax_loss(out1, target, dout, numSamples, outputDim); + printf("LOSS:%f\n", loss); + printFloatArray(dout, outputDim * numSamples); /* BACKWARD PROP */ - float* din; - } - - float softmax_loss(float *pred, float *target, float *dout, int numSamples, int outputDim) { - /* Returns a float representing the loss, and updates dout - pred: Shape numSamples x outputDim - target: Shape numSamples - dout: Each element - */ - //Alloc and copy predicted - float *dev_pred; - float *dev_pred2; - cudaMalloc((void**)&dev_pred, numSamples * outputDim * sizeof(float)); - checkCUDAError("cuda Malloc dev_pred failed"); - cudaMalloc((void**)&dev_pred2, numSamples * outputDim * sizeof(float)); - checkCUDAError("cuda Malloc dev_pred2 failed"); - cudaMemcpy(dev_pred, pred, numSamples * outputDim * sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy(dev_pred2, pred, numSamples * outputDim * sizeof(float), cudaMemcpyHostToDevice); - - //Alloc and copy predicted - float *dev_sum; - cudaMalloc((void**)&dev_sum, numSamples * sizeof(float)); - checkCUDAError("cuda Malloc dev_sum failed"); - - //Alloc and copy Target - float *dev_target; - cudaMalloc((void**)&dev_target, numSamples * sizeof(float)); - checkCUDAError("cuda Malloc dev_target failed"); - cudaMemcpy(dev_target, target, numSamples * sizeof(float), cudaMemcpyHostToDevice); - - //Apply Softmax to pred - dim3 outputGrid = (numSamples * outputDim + blockSize - 1) / blockSize; - kernSums << > > (float *pred, int *sums, int numSamples, int outputDim); - kernStableSoftmax << > >(dev_pred, dev_pred2, dev_sum, numSamples, outputDim); - kernCrossEntropy<<>>(dev_pred, ) - return 0.0; + float* dout1, dout0; + dout1 = layer2->backward(dout, 0.0); + printFloatArray(dout1, inputDim * numSamples); } } diff --git a/Project2-Character-Recognition/character_recognition/mlp.h b/Project2-Character-Recognition/character_recognition/mlp.h index a19ab1f..178a38f 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.h +++ b/Project2-Character-Recognition/character_recognition/mlp.h @@ -8,9 +8,6 @@ namespace CharacterRecognition { class AffineLayer{ float *dev_in; float *dev_out; - float *dev_dout; - float *dev_doutLinear; - float *dev_din; float *W; float *b; int numSamples; @@ -24,6 +21,9 @@ namespace CharacterRecognition { float* backward(float *dout, float lr); void setEval(bool state); void setSigmoid(bool state); + float softmax_loss(float *pred, float *target, float *dout, int numSamples, int outputDim); + void cpu_softmax(float *pred, int numSamples, int outputDim); + float cpu_crossEntropy(float *pred, float *target, int numSamples, int outputDim, float* dout); char* getType(); }; From cfea64175e77ab22bfc533783e18e50307684de0 Mon Sep 17 00:00:00 2001 From: botforge Date: Sat, 21 Sep 2019 16:11:54 -0700 Subject: [PATCH 09/48] MLP done --- .../character_recognition/mlp.cu | 141 +++++++++--------- .../character_recognition/mlp.h | 1 - 2 files changed, 68 insertions(+), 74 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index 6d3de57..94942b2 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -12,6 +12,14 @@ namespace CharacterRecognition { static PerformanceTimer timer; return timer; } + + void printFloatArray(float *x, int n) { + printf(" [ "); + for (int i = 0; i < n; i++) { + printf("%f ", x[i]); + } + printf("]\n"); + } __host__ __device__ unsigned int hash(unsigned int a) { a = (a + 0x7ed55d16) + (a << 12); a = (a ^ 0xc761c23c) ^ (a >> 19); @@ -29,8 +37,8 @@ namespace CharacterRecognition { } thrust::default_random_engine rng(hash((int)(index * inputDim * outputDim))); thrust::uniform_real_distribution dist(0.0, 1.0); - //W[index] = dist(rng); - W[index] = 0.1 * index; + W[index] = dist(rng); + //W[index] = 0.1 * index; int y = index / outputDim; b[y] = 0; } @@ -110,7 +118,7 @@ namespace CharacterRecognition { if (row < inputDim && col < outputDim) { for (int i = 0; i < numSamples; i++) { doutLinearIdx = doutLinear[i * outputDim + col]; - val += in[row * inputDim + i] * doutLinearIdx; + val += in[i * inputDim + row] * doutLinearIdx; dbval += doutLinearIdx; } W[row * outputDim + col] = currW - lr * (val); @@ -153,9 +161,6 @@ namespace CharacterRecognition { checkCUDAError("cuda Malloc b failed"); cudaMalloc((void**)&dev_in, inputDim * numSamples * sizeof(float)); checkCUDAError("cuda Malloc dev_in in failed"); - cudaMalloc((void**)&dev_out, outputDim * numSamples * sizeof(float)); - checkCUDAError("cuda Malloc dev_out in failed"); - //Call Initializer Kernels dim3 fullBlocksPerGrid((inputDim * outputDim + blockSize - 1) / blockSize); @@ -173,6 +178,10 @@ namespace CharacterRecognition { /*Uses W & b to perform forward pass on an Affine Layer Assumes dev_input is set (on GPU), numSamples is set and eval is set */ + float *dev_out; + cudaMalloc((void**)&dev_out, outputDim * numSamples * sizeof(float)); + checkCUDAError("cuda Malloc dev_out in failed"); + //Memcpy the *in information into dev_in cudaMemcpy(dev_in, in, inputDim * numSamples * sizeof(float), cudaMemcpyHostToDevice); @@ -216,7 +225,6 @@ namespace CharacterRecognition { checkCUDAError("cuda Malloc dev_din in failed"); //Get derivative of softmax, and update kern_dSigmoid<<>>(dev_dout, dev_doutLinear, inputDim, outputDim); - cudaFree(&dev_dout); } else { dev_doutLinear = dev_dout; @@ -225,25 +233,29 @@ namespace CharacterRecognition { //Use matrix to compute dIn kern_dIn<<>>(dev_doutLinear, W, dev_din, inputDim, outputDim, numSamples); + //Update dw and db kern_dW<<>>(W, b, dev_doutLinear, dev_in, inputDim, outputDim, numSamples, lr); + //DEBUG STUFF + float *myW= new float[inputDim * outputDim]; + cudaMemcpy(myW, W, inputDim * outputDim * sizeof(float), cudaMemcpyDeviceToHost); + printf("MY WSTARTS\n"); + printFloatArray(myW, inputDim * outputDim); + printf("MY WENDS\n"); + //Memcpy back the din info float *din = new float[inputDim * numSamples]; cudaMemcpy(din, dev_din, inputDim * numSamples * sizeof(float), cudaMemcpyDeviceToHost); checkCUDAError("cuda Memcpy din in failed"); + + + //Free Mems + cudaFree(dev_doutLinear); cudaFree(dev_din); return din; } - void printFloatArray(float *x, int n) { - printf(" [ "); - for (int i = 0; i < n; i++) { - printf("%f ", x[i]); - } - printf("]\n"); - } - void cpu_softmax(float *pred, int numSamples, int outputDim) { float rowSum = 0; for (int i = 0; i < numSamples; ++i) { @@ -287,35 +299,10 @@ namespace CharacterRecognition { target: Shape numSamples dout: Each element */ - //Alloc and copy predicted - float *dev_pred; - float *dev_pred2; - cudaMalloc((void**)&dev_pred, numSamples * outputDim * sizeof(float)); - checkCUDAError("cuda Malloc dev_pred failed"); - cudaMalloc((void**)&dev_pred2, numSamples * outputDim * sizeof(float)); - checkCUDAError("cuda Malloc dev_pred2 failed"); - cudaMemcpy(dev_pred, pred, numSamples * outputDim * sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy(dev_pred2, pred, numSamples * outputDim * sizeof(float), cudaMemcpyHostToDevice); - - //Alloc and copy predicted - float *dev_sum; - cudaMalloc((void**)&dev_sum, numSamples * sizeof(float)); - checkCUDAError("cuda Malloc dev_sum failed"); - - //Alloc and copy Target - float *dev_target; - cudaMalloc((void**)&dev_target, numSamples * sizeof(float)); - checkCUDAError("cuda Malloc dev_target failed"); - cudaMemcpy(dev_target, target, numSamples * sizeof(float), cudaMemcpyHostToDevice); - printf("PRED INSIDE B4\n"); - printFloatArray(pred, numSamples * outputDim); //Apply Softmax to pred cpu_softmax(pred, numSamples, outputDim); - printf("PRED INSIDE AFTER\n"); - printFloatArray(pred, numSamples * outputDim); - float loss = cpu_crossEntropy(pred, target, numSamples, outputDim, dout); return loss; } @@ -330,44 +317,52 @@ namespace CharacterRecognition { //XOR Input Array and Target Array float *x = new float[numSamples * inputDim]; float *target = new float[numSamples * outputDim]; - for (int i = 0; i < numSamples * inputDim; ++i) { - if (i % 2 == 0) { - x[i] = 1; - } - else { - x[i] = 0; - } - } - for (int i = 0; i < numSamples * outputDim; ++i) { - target[i] = 1; - } - printFloatArray(x, numSamples * inputDim); - printFloatArray(target, numSamples); + x[0] = 0; + x[1] = 0; + target[0] = 0; + x[2] = 0; + x[3] = 1; + target[1] = 1; + x[4] = 1; + x[5] = 0; + target[2] = 1; + x[6] = 1; + x[7] = 1; + target[3] = 0; //Build Layers AffineLayer* layer1 = new AffineLayer(inputDim, hiddenDim[0], numSamples); - layer1->setSigmoid(false); + //layer1->setSigmoid(false); AffineLayer* layer1copy = new AffineLayer(inputDim, hiddenDim[0], numSamples); - layer1copy->setSigmoid(false); + //layer1copy->setSigmoid(false); AffineLayer* layer2 = new AffineLayer(hiddenDim[0], outputDim, numSamples); layer2->setSigmoid(false); - - /* FORWARD PROP */ - float *out0, *out1; - out0 = layer1->forward(x, numSamples); - printFloatArray(out0, numSamples * outputDim); - out1 = layer2->forward(out0, numSamples); - printFloatArray(out1, numSamples * outputDim); - - /* CALCULATE LOSS */ - float *dout = new float[outputDim * numSamples]; - float loss = softmax_loss(out1, target, dout, numSamples, outputDim); - printf("LOSS:%f\n", loss); - printFloatArray(dout, outputDim * numSamples); - - /* BACKWARD PROP */ - float* dout1, dout0; - dout1 = layer2->backward(dout, 0.0); - printFloatArray(dout1, inputDim * numSamples); + float lr = 0.01; + for (int l = 0; l < 100; ++l) { + /* FORWARD PROP */ + float *out0, *out1; + printf("IN\n"); + printFloatArray(x, numSamples * outputDim); + out0 = layer1->forward(x, numSamples); + printf("OUT0\n"); + printFloatArray(out0, numSamples * outputDim); + out1 = layer2->forward(out0, numSamples); + printf("OUT1\n"); + printFloatArray(out1, numSamples * outputDim); + + /* CALCULATE LOSS */ + float *dout = new float[outputDim * numSamples]; + float loss = softmax_loss(out1, target, dout, numSamples, outputDim); + printf("LOSS:%f\n", loss); + printFloatArray(dout, outputDim * numSamples); + + /* BACKWARD PROP */ + float *dout1, *dout0; + dout1 = layer2->backward(dout, lr); + dout0 = layer1->backward(dout1, lr); + printf("DOUT0\n"); + printFloatArray(dout0, inputDim * numSamples); + printf("======================================\n", loss); + } } } diff --git a/Project2-Character-Recognition/character_recognition/mlp.h b/Project2-Character-Recognition/character_recognition/mlp.h index 178a38f..7f75d54 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.h +++ b/Project2-Character-Recognition/character_recognition/mlp.h @@ -7,7 +7,6 @@ namespace CharacterRecognition { Common::PerformanceTimer& timer(); class AffineLayer{ float *dev_in; - float *dev_out; float *W; float *b; int numSamples; From 9047da1ac57d8beb963249d446a68992e24d277e Mon Sep 17 00:00:00 2001 From: botforge Date: Sat, 21 Sep 2019 17:46:59 -0700 Subject: [PATCH 10/48] modular layers done --- .../character_recognition/mlp.cu | 58 +++++++++---------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index 94942b2..91c7801 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -309,9 +309,10 @@ namespace CharacterRecognition { void XORTest() { //Network Structure - int numSamples = 4; + int numSamples = 2; int inputDim = 2; - int hiddenDim[1] = { 3 }; + int hiddenDim[3] = {3}; + int numLayers = 3; int outputDim = 2; //XOR Input Array and Target Array @@ -323,45 +324,44 @@ namespace CharacterRecognition { x[2] = 0; x[3] = 1; target[1] = 1; + /* x[4] = 1; x[5] = 0; target[2] = 1; x[6] = 1; x[7] = 1; target[3] = 0; + */ //Build Layers - AffineLayer* layer1 = new AffineLayer(inputDim, hiddenDim[0], numSamples); - //layer1->setSigmoid(false); - AffineLayer* layer1copy = new AffineLayer(inputDim, hiddenDim[0], numSamples); - //layer1copy->setSigmoid(false); - AffineLayer* layer2 = new AffineLayer(hiddenDim[0], outputDim, numSamples); - layer2->setSigmoid(false); - float lr = 0.01; - for (int l = 0; l < 100; ++l) { - /* FORWARD PROP */ - float *out0, *out1; - printf("IN\n"); - printFloatArray(x, numSamples * outputDim); - out0 = layer1->forward(x, numSamples); - printf("OUT0\n"); - printFloatArray(out0, numSamples * outputDim); - out1 = layer2->forward(out0, numSamples); - printf("OUT1\n"); - printFloatArray(out1, numSamples * outputDim); - - /* CALCULATE LOSS */ + std::vector layers; + layers.push_back(new AffineLayer(inputDim, hiddenDim[0], numSamples)); + for (int l = 1; l < numLayers; ++l) { + AffineLayer* currLayer = new AffineLayer(hiddenDim[l - 1], hiddenDim[l], numSamples); + layers.push_back(currLayer); + } + layers.push_back(new AffineLayer(hiddenDim[numLayers-1], outputDim, numSamples)); + layers[layers.size() - 1]->setSigmoid(false); + + float lr = 0.7; + for (int k = 0; k < 100; ++k) { + //FORWARD PROP + float *out; + out = x; + for (int c = 0; c < layers.size(); ++c) { + out = layers[c]->forward(out, numSamples); + } + + //CALCULATE LOSS float *dout = new float[outputDim * numSamples]; - float loss = softmax_loss(out1, target, dout, numSamples, outputDim); + float loss = softmax_loss(out, target, dout, numSamples, outputDim); printf("LOSS:%f\n", loss); printFloatArray(dout, outputDim * numSamples); - /* BACKWARD PROP */ - float *dout1, *dout0; - dout1 = layer2->backward(dout, lr); - dout0 = layer1->backward(dout1, lr); - printf("DOUT0\n"); - printFloatArray(dout0, inputDim * numSamples); + //BACKWARD PROP + for (int v = layers.size() - 1; v >= 0; v--) { + dout = layers[v]->backward(dout, lr); + } printf("======================================\n", loss); } } From c4600066c77a5b4774128468b6867c8b21f995f1 Mon Sep 17 00:00:00 2001 From: botforge Date: Sun, 22 Sep 2019 08:37:27 -0700 Subject: [PATCH 11/48] going to try messing with loss one sec --- .../character_recognition/mlp.cu | 97 +++++++++++++------ 1 file changed, 69 insertions(+), 28 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index 91c7801..0612c0f 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -5,6 +5,10 @@ #include #define blockSize 512 +#define NUM_ITERS 100 +#define LEARNING_RATE 0.1 +#define FULLBATCH 0 + namespace CharacterRecognition { using Common::PerformanceTimer; PerformanceTimer& timer() @@ -35,7 +39,7 @@ namespace CharacterRecognition { if (index >= inputDim * outputDim) { return; } - thrust::default_random_engine rng(hash((int)(index * inputDim * outputDim))); + thrust::default_random_engine rng(hash((int)(index * inputDim * outputDim + W[0]))); thrust::uniform_real_distribution dist(0.0, 1.0); W[index] = dist(rng); //W[index] = 0.1 * index; @@ -58,13 +62,13 @@ namespace CharacterRecognition { for (int i = 0; i < inputDim; i++) { val += in[row * inputDim + i] * W[i * outputDim + col]; } - val += b[row]; - out[row * outputDim + col] = sigmoid ? 1/(1+__expf(-val)) : val; + val += b[col]; + out[row * outputDim + col] = sigmoid ? 1.0/(1+expf(-1.0*val)) : val; } } __device__ float applySigmoid(float x) { - return 1 / (1 + __expf(-x)); + return 1 / (1 + expf(-x)); } __device__ float dSigmoid(float x) { @@ -193,6 +197,9 @@ namespace CharacterRecognition { float *out = new float[outputDim * numSamples]; cudaMemcpy(out, dev_out, outputDim * numSamples * sizeof(float), cudaMemcpyDeviceToHost); + printf("SCORES\n"); + printFloatArray(out, outputDim*numSamples); + //free (dont free dev_in because you'll need it for backprop) cudaFree(dev_out); return out; @@ -240,9 +247,15 @@ namespace CharacterRecognition { //DEBUG STUFF float *myW= new float[inputDim * outputDim]; cudaMemcpy(myW, W, inputDim * outputDim * sizeof(float), cudaMemcpyDeviceToHost); - printf("MY WSTARTS\n"); + printf("BACKPROP:WSTARTS\n"); printFloatArray(myW, inputDim * outputDim); - printf("MY WENDS\n"); + printf("BACKPROP:MY WENDS\n"); + + float *myb= new float[outputDim]; + cudaMemcpy(myb, b, outputDim * sizeof(float), cudaMemcpyDeviceToHost); + printf("BACKPROP:MY bSTARTS\n"); + printFloatArray(myb, outputDim); + printf("BACKPROP:MY bENDS\n"); //Memcpy back the din info float *din = new float[inputDim * numSamples]; @@ -268,6 +281,8 @@ namespace CharacterRecognition { pred[i * outputDim + k] /= rowSum; } } + printf("PRED-PROBABILITIES\n"); + printFloatArray(pred, outputDim); } float cpu_crossEntropy(float *pred, float *target, int numSamples, int outputDim, float* dout){ @@ -307,44 +322,70 @@ namespace CharacterRecognition { return loss; } + void getXORSample(int idx, float *x, float *target) { + if (FULLBATCH) { + x[0] = 0; + x[1] = 0; + target[0] = 0; + x[2] = 0; + x[3] = 1; + target[1] = 1; + x[4] = 1; + x[5] = 0; + target[2] = 1; + x[6] = 1; + x[7] = 1; + target[3] = 0; + } + if (idx % 2 == 0) { + x[0] = 1; + x[1] = 1; + target[0] = 0; + } + else if (idx % 1 == 0) { + x[0] = 1; + x[1] = 0; + target[0] = 1; + } + else if (idx % 2 == 0) { + x[0] = 0; + x[1] = 0; + target[0] = 0; + } + else { + x[0] = 0; + x[1] = 1; + target[0] = 1; + } + } void XORTest() { //Network Structure - int numSamples = 2; + int numSamples = 1; int inputDim = 2; - int hiddenDim[3] = {3}; - int numLayers = 3; + int numLayers = 1; + int hiddenDim[1] = {3}; int outputDim = 2; //XOR Input Array and Target Array float *x = new float[numSamples * inputDim]; float *target = new float[numSamples * outputDim]; - x[0] = 0; - x[1] = 0; - target[0] = 0; - x[2] = 0; - x[3] = 1; - target[1] = 1; - /* - x[4] = 1; - x[5] = 0; - target[2] = 1; - x[6] = 1; - x[7] = 1; - target[3] = 0; - */ - //Build Layers std::vector layers; layers.push_back(new AffineLayer(inputDim, hiddenDim[0], numSamples)); for (int l = 1; l < numLayers; ++l) { AffineLayer* currLayer = new AffineLayer(hiddenDim[l - 1], hiddenDim[l], numSamples); + currLayer->setSigmoid(true); layers.push_back(currLayer); } layers.push_back(new AffineLayer(hiddenDim[numLayers-1], outputDim, numSamples)); layers[layers.size() - 1]->setSigmoid(false); - float lr = 0.7; - for (int k = 0; k < 100; ++k) { + for (int k = 0; k < NUM_ITERS; ++k) { + getXORSample(k, x, target); + printf("INPUT\n"); + printFloatArray(x, inputDim * numSamples); + printf("TARGET\n"); + printFloatArray(target, 1 * numSamples); //FORWARD PROP float *out; out = x; @@ -355,12 +396,12 @@ namespace CharacterRecognition { //CALCULATE LOSS float *dout = new float[outputDim * numSamples]; float loss = softmax_loss(out, target, dout, numSamples, outputDim); - printf("LOSS:%f\n", loss); + printf("LOSS BACKPROP:%f\n", loss); printFloatArray(dout, outputDim * numSamples); //BACKWARD PROP for (int v = layers.size() - 1; v >= 0; v--) { - dout = layers[v]->backward(dout, lr); + dout = layers[v]->backward(dout, LEARNING_RATE); } printf("======================================\n", loss); } From 1985afdd3b9cd76834af9a15712360e76e9916a2 Mon Sep 17 00:00:00 2001 From: botforge Date: Sun, 22 Sep 2019 09:58:50 -0700 Subject: [PATCH 12/48] just submit it --- .../character_recognition/mlp.cu | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/Project2-Character-Recognition/character_recognition/mlp.cu b/Project2-Character-Recognition/character_recognition/mlp.cu index 0612c0f..663cc5d 100644 --- a/Project2-Character-Recognition/character_recognition/mlp.cu +++ b/Project2-Character-Recognition/character_recognition/mlp.cu @@ -5,7 +5,7 @@ #include #define blockSize 512 -#define NUM_ITERS 100 +#define NUM_ITERS 50 #define LEARNING_RATE 0.1 #define FULLBATCH 0 @@ -294,7 +294,9 @@ namespace CharacterRecognition { dout[i * outputDim + c] = ting; } } - + printf("DOUT\n"); + printFloatArray(dout, outputDim); + for (int i = 0; i < numSamples; ++i) { int offset = target[i]; float ting = pred[i * outputDim + offset]; @@ -337,20 +339,20 @@ namespace CharacterRecognition { x[7] = 1; target[3] = 0; } - if (idx % 2 == 0) { + if (idx % 4 == 0) { x[0] = 1; x[1] = 1; target[0] = 0; } - else if (idx % 1 == 0) { - x[0] = 1; + else if (idx % 3 == 0) { + x[0] = 0; x[1] = 0; - target[0] = 1; + target[0] = 0; } else if (idx % 2 == 0) { - x[0] = 0; + x[0] = 1; x[1] = 0; - target[0] = 0; + target[0] = 1; } else { x[0] = 0; @@ -358,12 +360,13 @@ namespace CharacterRecognition { target[0] = 1; } } + void XORTest() { //Network Structure int numSamples = 1; int inputDim = 2; int numLayers = 1; - int hiddenDim[1] = {3}; + int hiddenDim[1] = {5}; int outputDim = 2; //XOR Input Array and Target Array From 728e9b5b3f3aa3d9f15a4c68dfaacabf454ade07 Mon Sep 17 00:00:00 2001 From: botforge Date: Sun, 22 Sep 2019 15:26:48 -0700 Subject: [PATCH 13/48] almost finished with everything --- Project2-Stream-Compaction/src/main.cpp | 31 ++-- .../stream_compaction/CMakeLists.txt | 2 +- .../stream_compaction/cpu.cu | 71 +++++++- .../stream_compaction/efficient.cu | 169 ++++++++++++++++-- .../stream_compaction/naive.cu | 62 ++++++- 5 files changed, 304 insertions(+), 31 deletions(-) diff --git a/Project2-Stream-Compaction/src/main.cpp b/Project2-Stream-Compaction/src/main.cpp index d016553..23a95b9 100644 --- a/Project2-Stream-Compaction/src/main.cpp +++ b/Project2-Stream-Compaction/src/main.cpp @@ -13,7 +13,7 @@ #include #include "testing_helpers.hpp" -const int SIZE = 1 << 8; // feel free to change the size of array +const int SIZE = 1<<3; // feel free to change the size of array const int NPOT = SIZE - 3; // Non-Power-Of-Two int *a = new int[SIZE]; int *b = new int[SIZE]; @@ -27,8 +27,13 @@ int main(int argc, char* argv[]) { printf("** SCAN TESTS **\n"); printf("****************\n"); - genArray(SIZE - 1, a, 50); // Leave a 0 at the end to test that edge case - a[SIZE - 1] = 0; + genArray(SIZE, a, 50); // Leave a 0 at the end to test that edge case + a[SIZE - 1] = 0; + + for (int p = 0; p < SIZE; p++) { + a[p] = p; + } + printArray(SIZE, a, true); // initialize b using StreamCompaction::CPU::scan you implement @@ -50,8 +55,8 @@ int main(int argc, char* argv[]) { zeroArray(SIZE, c); printDesc("naive scan, power-of-two"); StreamCompaction::Naive::scan(SIZE, c, a); - printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); + printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); + printArray(SIZE, c, true); printCmpResult(SIZE, b, c); /* For bug-finding only: Array of 1s to help find bugs in stream compaction or scan @@ -59,28 +64,28 @@ int main(int argc, char* argv[]) { printDesc("1s array for finding bugs"); StreamCompaction::Naive::scan(SIZE, c, a); printArray(SIZE, c, true); */ - zeroArray(SIZE, c); printDesc("naive scan, non-power-of-two"); StreamCompaction::Naive::scan(NPOT, c, a); printElapsedTime(StreamCompaction::Naive::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); + printArray(NPOT, c, true); printCmpResult(NPOT, b, c); zeroArray(SIZE, c); printDesc("work-efficient scan, power-of-two"); StreamCompaction::Efficient::scan(SIZE, c, a); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(SIZE, c, true); + printArray(SIZE, c, true); printCmpResult(SIZE, b, c); zeroArray(SIZE, c); printDesc("work-efficient scan, non-power-of-two"); StreamCompaction::Efficient::scan(NPOT, c, a); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(NPOT, c, true); + printArray(NPOT, c, true); printCmpResult(NPOT, b, c); + /* zeroArray(SIZE, c); printDesc("thrust scan, power-of-two"); StreamCompaction::Thrust::scan(SIZE, c, a); @@ -94,12 +99,12 @@ int main(int argc, char* argv[]) { printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); //printArray(NPOT, c, true); printCmpResult(NPOT, b, c); + */ printf("\n"); printf("*****************************\n"); printf("** STREAM COMPACTION TESTS **\n"); printf("*****************************\n"); - // Compaction tests genArray(SIZE - 1, a, 4); // Leave a 0 at the end to test that edge case @@ -125,7 +130,7 @@ int main(int argc, char* argv[]) { expectedNPOT = count; printArray(count, c, true); printCmpLenResult(count, expectedNPOT, b, c); - + zeroArray(SIZE, c); printDesc("cpu compact with scan"); count = StreamCompaction::CPU::compactWithScan(SIZE, c, a); @@ -140,13 +145,15 @@ int main(int argc, char* argv[]) { //printArray(count, c, true); printCmpLenResult(count, expectedCount, b, c); + + /* zeroArray(SIZE, c); printDesc("work-efficient compact, non-power-of-two"); count = StreamCompaction::Efficient::compact(NPOT, c, a); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); //printArray(count, c, true); printCmpLenResult(count, expectedNPOT, b, c); - + */ system("pause"); // stop Win32 console from closing on exit delete[] a; delete[] b; diff --git a/Project2-Stream-Compaction/stream_compaction/CMakeLists.txt b/Project2-Stream-Compaction/stream_compaction/CMakeLists.txt index cdbef77..185a604 100644 --- a/Project2-Stream-Compaction/stream_compaction/CMakeLists.txt +++ b/Project2-Stream-Compaction/stream_compaction/CMakeLists.txt @@ -13,5 +13,5 @@ set(SOURCE_FILES cuda_add_library(stream_compaction ${SOURCE_FILES} - OPTIONS -arch=sm_20 + OPTIONS -arch=sm_75 ) diff --git a/Project2-Stream-Compaction/stream_compaction/cpu.cu b/Project2-Stream-Compaction/stream_compaction/cpu.cu index a2d3e6c..ac9089a 100644 --- a/Project2-Stream-Compaction/stream_compaction/cpu.cu +++ b/Project2-Stream-Compaction/stream_compaction/cpu.cu @@ -18,33 +18,92 @@ namespace StreamCompaction { * (Optional) For better understanding before starting moving to GPU, you can simulate your GPU scan in this function first. */ void scan(int n, int *odata, const int *idata) { + // idata: orig int array, odata: output int array, n is len(int array) timer().startCpuTimer(); - // TODO + odata[0] = 0; + for (int i = 1; i < n; i++) { + odata[i] = odata[i - 1] + idata[i-1]; //n-1 adds + } timer().endCpuTimer(); } + void scan_notimer(int n, int *odata, const int *idata) { + // idata: orig int array, odata: output int array, n is len(int array) + odata[0] = 0; + for (int i = 1; i < n; i++) { + odata[i] = odata[i - 1] + idata[i-1]; //n-1 adds + } + } + + /** * CPU stream compaction without using the scan function. * * @returns the number of elements remaining after compaction. */ int compactWithoutScan(int n, int *odata, const int *idata) { + // idata: orig int array, odata: output int array, n is len(int array) timer().startCpuTimer(); - // TODO + int num_nonzeros = 0; + for (int i = 0; i < n; i++) { + int elt_i = idata[i]; + if (elt_i != 0) { + odata[num_nonzeros] = elt_i; + ++num_nonzeros; + } + } timer().endCpuTimer(); - return -1; + return num_nonzeros; } + void computeTemporaryArray(int n, int *tempArray, const int *idata) { + //Temporary array copies zeros & sets nonzeros to 1 + for (int i = 0; i < n; i++) { + if (idata[i] != 0) { + tempArray[i] = 1; + } + else { + tempArray[i] = 0; + } + } + } + + int scatter(int n, int *odata, const int *idata, const int *tempArray) { + //odata now contains the scan result + int elt_i, shouldInclude, newIdx; + int count = 0; + for (int i = 0; i < n; i++) { + shouldInclude = tempArray[i]; + elt_i = idata[i]; + if (shouldInclude) { + newIdx = odata[i]; + odata[newIdx] = elt_i; + ++count; + } + } + return count; + } + /** * CPU stream compaction using scan and scatter, like the parallel version. * * @returns the number of elements remaining after compaction. */ int compactWithScan(int n, int *odata, const int *idata) { + // idata: orig int array, odata: output int array, n is len(int array) timer().startCpuTimer(); - // TODO + + //1: Malloc & Compute Temporary Array + int *tempArray = new int[n]; + computeTemporaryArray(n, tempArray, idata); + + //2: Exclusive Scan on tempArray + scan_notimer(n, odata, tempArray); + + //3: Scatter + int newlen = scatter(n, odata, idata, tempArray); timer().endCpuTimer(); - return -1; + return newlen; } } -} +} \ No newline at end of file diff --git a/Project2-Stream-Compaction/stream_compaction/efficient.cu b/Project2-Stream-Compaction/stream_compaction/efficient.cu index 2db346e..fc7ebe3 100644 --- a/Project2-Stream-Compaction/stream_compaction/efficient.cu +++ b/Project2-Stream-Compaction/stream_compaction/efficient.cu @@ -3,24 +3,141 @@ #include "common.h" #include "efficient.h" +/*! Block size used for CUDA kernel launch*/ +#define blockSize 512 +int *dev_idata; +int *dev_odata; +int *dev_mask; namespace StreamCompaction { - namespace Efficient { - using StreamCompaction::Common::PerformanceTimer; - PerformanceTimer& timer() - { - static PerformanceTimer timer; - return timer; - } + namespace Efficient { + using StreamCompaction::Common::PerformanceTimer; + PerformanceTimer& timer() + { + static PerformanceTimer timer; + return timer; + } + + int nextPowerOf2(int n) { + int p = 1; + if (n && !(n & (n - 1))) { + return n; + } + while (p < n) { + p <<= 1; + } + return p; + } + + __global__ void kernUpsweep(int n, int d, int *dev_odata, int incr, int twod) { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + //also return if index is not a multiple of the incr + if (index >= n || (index) % incr != 0) { + return; + } + //if we reached here, index+1 must be a multiple of incr (2^(d+1)) + dev_odata[index + incr - 1] += dev_odata[index + twod - 1]; + dev_odata[n - 1] = 0; + } + __global__ void kernDownsweep(int n, int d, int *dev_odata, int incr, int twod) { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + //also return if index is not a multiple of the incr + if (index >= n || (index) % incr != 0) { + return; + } + //if we reached here, index+1 must be a multiple of incr (2^(d+1)) + int t = dev_odata[index + twod - 1]; + dev_odata[index + twod - 1] = dev_odata[index + incr - 1]; + dev_odata[index + incr - 1] += t; + } + + __global__ void kernMapToBoolean(int n, int *mask, int *idata) { + //dev_odata contains idata + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index >= n) { + return; + } + if (idata[index] != 0) { + mask[index] = 1; + } + else { + mask[index] = 0; + } + } + + __global__ void kernScatter(int n, int *mask, int *odata, int *idata) { + //odata now contains scan result + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index >= n){ + return; + } + int shouldInclude = mask[index]; + if (shouldInclude) { + int newIdx = odata[index]; + odata[newIdx] = idata[index]; + } + } /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { + int malloc_size = nextPowerOf2(n); + //CUDA Malloc buffers + cudaMalloc((void**)&dev_odata, malloc_size * sizeof(int)); + checkCUDAError("cudaMalloc dev_odata failed!"); + + dim3 fullBlocksPerGrid((n + blockSize - 1) / blockSize); + int max_level = ilog2ceil(n); + int incr = 0; + int twod = 0; + timer().startGpuTimer(); - // TODO + //Copy idata into dev_odata + cudaMemcpy(dev_odata, idata, n * sizeof(int), cudaMemcpyHostToDevice); + checkCUDAError("cudaMemcpy dev_odata failed!"); + + //Upsweep + for (int d = 0; d < max_level; d++) { + incr = pow(2, d + 1); + twod = pow(2, d); + kernUpsweep<<>>(malloc_size, d, dev_odata, incr, twod); + } + + //Downsweep + for (int d = max_level-1; d >= 0; d--) { + incr = pow(2, d + 1); + twod = pow(2, d); + kernDownsweep<<>>(malloc_size, d, dev_odata, incr, twod); + } + cudaMemcpy(odata, dev_odata, n * sizeof(int), cudaMemcpyDeviceToHost); timer().endGpuTimer(); + + //Free Memory + cudaFree(dev_odata); + } + + void scan_notimer(int n, int malloc_size) { + dim3 fullBlocksPerGrid((malloc_size + blockSize - 1) / blockSize); + int max_level = ilog2ceil(n); + int incr = 0; + int twod = 0; + + //Upsweep + for (int d = 0; d < max_level; d++) { + incr = pow(2, d + 1); + twod = pow(2, d); + kernUpsweep<<>>(malloc_size, d, dev_odata, incr, twod); + } + + //Downsweep + for (int d = max_level-1; d >= 0; d--) { + incr = pow(2, d + 1); + twod = pow(2, d); + kernDownsweep<<>>(malloc_size, d, dev_odata, incr, twod); + } } + /** * Performs stream compaction on idata, storing the result into odata. * All zeroes are discarded. @@ -31,10 +148,42 @@ namespace StreamCompaction { * @returns The number of elements remaining after compaction. */ int compact(int n, int *odata, const int *idata) { + int malloc_size = nextPowerOf2(n); + //CUDA Malloc buffers + cudaMalloc((void**)&dev_odata, malloc_size * sizeof(int)); + checkCUDAError("cudaMalloc dev_odata failed!"); + cudaMalloc((void**)&dev_odata, malloc_size * sizeof(int)); + checkCUDAError("cudaMalloc dev_odata failed!"); + cudaMalloc((void**)&dev_mask, malloc_size * sizeof(int)); + checkCUDAError("cudaMalloc failed!"); + + //Memcpy idata into dev_odata for starters + cudaMemcpy(dev_idata, idata, n * sizeof(int), cudaMemcpyHostToDevice); + checkCUDAError("cudaMemcpy dev_idata failed!"); + cudaMemcpy(dev_odata, idata, n * sizeof(int), cudaMemcpyHostToDevice); + checkCUDAError("cudaMemcpy dev_odata failed!"); + timer().startGpuTimer(); - // TODO + dim3 fullBlocksPerGrid((n + blockSize - 1) / blockSize); + //1: Compute mask (Temporary Array) + kernMapToBoolean<<>>(n, dev_mask, dev_odata); + + //2: Exclusive Scan on TempArray + scan_notimer(n, malloc_size); + + //2.5: Get Count from dev_odata + int tempcount[1]; + cudaMemcpy(&tempcount, dev_odata + n - 1, 1 * sizeof(int), cudaMemcpyDeviceToHost); + int count = idata[n - 1] == 0 ? tempcount[0] : tempcount[0] + 1; + + //3: Scatter + kernScatter<<>>(n, dev_mask, dev_odata, dev_idata); + timer().endGpuTimer(); - return -1; + + free(dev_mask); + free(dev_odata); + return count; } } } diff --git a/Project2-Stream-Compaction/stream_compaction/naive.cu b/Project2-Stream-Compaction/stream_compaction/naive.cu index 4308876..5434774 100644 --- a/Project2-Stream-Compaction/stream_compaction/naive.cu +++ b/Project2-Stream-Compaction/stream_compaction/naive.cu @@ -3,6 +3,11 @@ #include "common.h" #include "naive.h" +/*! Block size used for CUDA kernel launch*/ +#define blockSize 512 +int *dev_A; +int *dev_B; + namespace StreamCompaction { namespace Naive { using StreamCompaction::Common::PerformanceTimer; @@ -11,15 +16,68 @@ namespace StreamCompaction { static PerformanceTimer timer; return timer; } - // TODO: __global__ + + __global__ void kernNaiveScan(int n, int curr_level, int* devA, int* devB) { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index >= n) { + return; + } + int offset = (int)powf(2, curr_level - 1); + if (index >= offset) { + devB[index] = devA[index - offset] + devA[index]; + } + else { + devB[index] = devA[index]; + } + } + + int nextPowerOf2(int n) { + int p = 1; + if (n && !(n & (n - 1))) { + return n; + } + while (p < n) { + p <<= 1; + } + return p; + } /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { + int malloc_size = nextPowerOf2(n); + //CUDA Malloc buffers + cudaMalloc((void**)&dev_A, malloc_size * sizeof(int)); + checkCUDAError("cudaMalloc dev_A failed!"); + cudaMalloc((void**)&dev_B, malloc_size * sizeof(int)); + checkCUDAError("cudaMalloc dev_A failed!"); + + dim3 fullBlocksPerGrid((n + blockSize - 1) / blockSize); + int max_level = ilog2ceil(n); + timer().startGpuTimer(); - // TODO + //Copy idata into dev_A + cudaMemcpy(dev_A, idata, n * sizeof(int), cudaMemcpyHostToDevice); + + //loop over each level + for (int curr_level = 1; curr_level <= max_level; curr_level++) { + //Launch Kernel (thereby updating dev_B) + kernNaiveScan<<>>(n, curr_level, dev_A, dev_B); + + //Copy dev_B's updated data into dev_A + cudaMemcpy(dev_A, dev_B, n * sizeof(int), cudaMemcpyDeviceToDevice); + checkCUDAError("cudaMemcpy dev_A to dev_B failed!"); + } + //Exclusive Scan so shift right when copying back + cudaMemcpy(odata+1, dev_A, (n-1) * sizeof(int), cudaMemcpyDeviceToHost); + odata[0] = 0; + checkCUDAError("cudaMemcpy dev_A to out failed!"); timer().endGpuTimer(); + + //Free Memory + cudaFree(dev_A); + cudaFree(dev_B); } } } From 06099f0151602b8c06ef202bad0bb473c519e2d6 Mon Sep 17 00:00:00 2001 From: botforge Date: Sun, 22 Sep 2019 18:06:15 -0700 Subject: [PATCH 14/48] All done time to submit --- Project2-Stream-Compaction/src/main.cpp | 16 +-- .../stream_compaction/efficient.cu | 98 +++++++++++++------ 2 files changed, 73 insertions(+), 41 deletions(-) diff --git a/Project2-Stream-Compaction/src/main.cpp b/Project2-Stream-Compaction/src/main.cpp index 23a95b9..08f7f56 100644 --- a/Project2-Stream-Compaction/src/main.cpp +++ b/Project2-Stream-Compaction/src/main.cpp @@ -13,7 +13,7 @@ #include #include "testing_helpers.hpp" -const int SIZE = 1<<3; // feel free to change the size of array +const int SIZE = 1<<8; // feel free to change the size of array const int NPOT = SIZE - 3; // Non-Power-Of-Two int *a = new int[SIZE]; int *b = new int[SIZE]; @@ -30,10 +30,6 @@ int main(int argc, char* argv[]) { genArray(SIZE, a, 50); // Leave a 0 at the end to test that edge case a[SIZE - 1] = 0; - for (int p = 0; p < SIZE; p++) { - a[p] = p; - } - printArray(SIZE, a, true); // initialize b using StreamCompaction::CPU::scan you implement @@ -84,7 +80,6 @@ int main(int argc, char* argv[]) { printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); printArray(NPOT, c, true); printCmpResult(NPOT, b, c); - /* zeroArray(SIZE, c); printDesc("thrust scan, power-of-two"); @@ -108,9 +103,8 @@ int main(int argc, char* argv[]) { // Compaction tests genArray(SIZE - 1, a, 4); // Leave a 0 at the end to test that edge case - a[SIZE - 1] = 0; + a[SIZE - 1] = 0; printArray(SIZE, a, true); - int count, expectedCount, expectedNPOT; // initialize b using StreamCompaction::CPU::compactWithoutScan you implement @@ -142,18 +136,16 @@ int main(int argc, char* argv[]) { printDesc("work-efficient compact, power-of-two"); count = StreamCompaction::Efficient::compact(SIZE, c, a); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(count, c, true); + printArray(count, c, true); printCmpLenResult(count, expectedCount, b, c); - /* zeroArray(SIZE, c); printDesc("work-efficient compact, non-power-of-two"); count = StreamCompaction::Efficient::compact(NPOT, c, a); printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); - //printArray(count, c, true); + printArray(count, c, true); printCmpLenResult(count, expectedNPOT, b, c); - */ system("pause"); // stop Win32 console from closing on exit delete[] a; delete[] b; diff --git a/Project2-Stream-Compaction/stream_compaction/efficient.cu b/Project2-Stream-Compaction/stream_compaction/efficient.cu index fc7ebe3..2eeabc7 100644 --- a/Project2-Stream-Compaction/stream_compaction/efficient.cu +++ b/Project2-Stream-Compaction/stream_compaction/efficient.cu @@ -5,9 +5,6 @@ /*! Block size used for CUDA kernel launch*/ #define blockSize 512 -int *dev_idata; -int *dev_odata; -int *dev_mask; namespace StreamCompaction { namespace Efficient { using StreamCompaction::Common::PerformanceTimer; @@ -28,26 +25,26 @@ namespace StreamCompaction { return p; } - __global__ void kernUpsweep(int n, int d, int *dev_odata, int incr, int twod) { + __global__ void kernUpsweep(int n, int d, int *odata, int *odata2, int incr, int twod) { int index = (blockIdx.x * blockDim.x) + threadIdx.x; //also return if index is not a multiple of the incr if (index >= n || (index) % incr != 0) { return; } //if we reached here, index+1 must be a multiple of incr (2^(d+1)) - dev_odata[index + incr - 1] += dev_odata[index + twod - 1]; - dev_odata[n - 1] = 0; + odata[index + incr - 1] += odata2[index + twod - 1]; + odata[n - 1] = 0; } - __global__ void kernDownsweep(int n, int d, int *dev_odata, int incr, int twod) { + __global__ void kernDownsweep(int n, int d, int *odata, int *odata2, int incr, int twod) { int index = (blockIdx.x * blockDim.x) + threadIdx.x; //also return if index is not a multiple of the incr if (index >= n || (index) % incr != 0) { return; } //if we reached here, index+1 must be a multiple of incr (2^(d+1)) - int t = dev_odata[index + twod - 1]; - dev_odata[index + twod - 1] = dev_odata[index + incr - 1]; - dev_odata[index + incr - 1] += t; + int t = odata[index + twod - 1]; + odata[index + twod - 1] = odata2[index + incr - 1]; + odata[index + incr - 1] += t; } __global__ void kernMapToBoolean(int n, int *mask, int *idata) { @@ -64,7 +61,7 @@ namespace StreamCompaction { } } - __global__ void kernScatter(int n, int *mask, int *odata, int *idata) { + __global__ void kernScatter(int n, int *mask, int *odata, int *odata2, int *idata) { //odata now contains scan result int index = (blockIdx.x * blockDim.x) + threadIdx.x; if (index >= n){ @@ -72,7 +69,7 @@ namespace StreamCompaction { } int shouldInclude = mask[index]; if (shouldInclude) { - int newIdx = odata[index]; + int newIdx = odata2[index]; odata[newIdx] = idata[index]; } } @@ -83,8 +80,12 @@ namespace StreamCompaction { void scan(int n, int *odata, const int *idata) { int malloc_size = nextPowerOf2(n); //CUDA Malloc buffers + int *dev_odata; + int *dev_odata2; cudaMalloc((void**)&dev_odata, malloc_size * sizeof(int)); checkCUDAError("cudaMalloc dev_odata failed!"); + cudaMalloc((void**)&dev_odata2, malloc_size * sizeof(int)); + checkCUDAError("cudaMalloc dev_odata2 failed!"); dim3 fullBlocksPerGrid((n + blockSize - 1) / blockSize); int max_level = ilog2ceil(n); @@ -95,28 +96,38 @@ namespace StreamCompaction { //Copy idata into dev_odata cudaMemcpy(dev_odata, idata, n * sizeof(int), cudaMemcpyHostToDevice); checkCUDAError("cudaMemcpy dev_odata failed!"); + cudaMemcpy(dev_odata2, dev_odata, n * sizeof(int), cudaMemcpyDeviceToDevice); + checkCUDAError("cudaMemcpy dev_odata failed!"); //Upsweep for (int d = 0; d < max_level; d++) { incr = pow(2, d + 1); twod = pow(2, d); - kernUpsweep<<>>(malloc_size, d, dev_odata, incr, twod); + kernUpsweep<<>>(malloc_size, d, dev_odata, dev_odata2, incr, twod); + + //Ping Pong the buffers + cudaMemcpy(dev_odata2, dev_odata, malloc_size * sizeof(int), cudaMemcpyDeviceToDevice); } //Downsweep for (int d = max_level-1; d >= 0; d--) { incr = pow(2, d + 1); twod = pow(2, d); - kernDownsweep<<>>(malloc_size, d, dev_odata, incr, twod); + kernDownsweep<<>>(malloc_size, d, dev_odata, dev_odata2, incr, twod); + + //Ping Pong the buffers + cudaMemcpy(dev_odata2, dev_odata, malloc_size * sizeof(int), cudaMemcpyDeviceToDevice); } cudaMemcpy(odata, dev_odata, n * sizeof(int), cudaMemcpyDeviceToHost); timer().endGpuTimer(); //Free Memory cudaFree(dev_odata); + cudaFree(dev_odata2); } - void scan_notimer(int n, int malloc_size) { + void scan_notimer(int n, int malloc_size, int *dev_odata, int *dev_odata2) { + //Odata contains mask info dim3 fullBlocksPerGrid((malloc_size + blockSize - 1) / blockSize); int max_level = ilog2ceil(n); int incr = 0; @@ -126,18 +137,36 @@ namespace StreamCompaction { for (int d = 0; d < max_level; d++) { incr = pow(2, d + 1); twod = pow(2, d); - kernUpsweep<<>>(malloc_size, d, dev_odata, incr, twod); + kernUpsweep<<>>(malloc_size, d, dev_odata, dev_odata2, incr, twod); + + //Ping Pong the buffers + cudaMemcpy(dev_odata2, dev_odata, malloc_size * sizeof(int), cudaMemcpyDeviceToDevice); } //Downsweep for (int d = max_level-1; d >= 0; d--) { incr = pow(2, d + 1); twod = pow(2, d); - kernDownsweep<<>>(malloc_size, d, dev_odata, incr, twod); + kernDownsweep<<>>(malloc_size, d, dev_odata, dev_odata2, incr, twod); + + //Ping Pong the buffers + cudaMemcpy(dev_odata2, dev_odata, malloc_size * sizeof(int), cudaMemcpyDeviceToDevice); } } + void printArray(int n, int *a, bool abridged = false) { + printf(" [ "); + for (int i = 0; i < n; i++) { + if (abridged && i + 2 == 15 && n > 16) { + i = n - 2; + printf("... "); + } + printf("%3d ", a[i]); + } + printf("]\n"); +} + /** * Performs stream compaction on idata, storing the result into odata. * All zeroes are discarded. @@ -150,39 +179,50 @@ namespace StreamCompaction { int compact(int n, int *odata, const int *idata) { int malloc_size = nextPowerOf2(n); //CUDA Malloc buffers - cudaMalloc((void**)&dev_odata, malloc_size * sizeof(int)); + int *dev_odata; + int *dev_odata2; + int *dev_idata; + int *dev_mask; + cudaMalloc((void**)&dev_odata, (malloc_size+1) * sizeof(int)); checkCUDAError("cudaMalloc dev_odata failed!"); - cudaMalloc((void**)&dev_odata, malloc_size * sizeof(int)); + cudaMalloc((void**)&dev_odata2, (malloc_size+1) * sizeof(int)); checkCUDAError("cudaMalloc dev_odata failed!"); + cudaMalloc((void**)&dev_idata, malloc_size * sizeof(int)); + checkCUDAError("cudaMalloc dev_in failed!"); cudaMalloc((void**)&dev_mask, malloc_size * sizeof(int)); - checkCUDAError("cudaMalloc failed!"); + checkCUDAError("cudaMalloc dev_temp failed!"); //Memcpy idata into dev_odata for starters cudaMemcpy(dev_idata, idata, n * sizeof(int), cudaMemcpyHostToDevice); checkCUDAError("cudaMemcpy dev_idata failed!"); - cudaMemcpy(dev_odata, idata, n * sizeof(int), cudaMemcpyHostToDevice); + cudaMemcpy(dev_odata, dev_idata, n * sizeof(int), cudaMemcpyDeviceToDevice); checkCUDAError("cudaMemcpy dev_odata failed!"); timer().startGpuTimer(); dim3 fullBlocksPerGrid((n + blockSize - 1) / blockSize); //1: Compute mask (Temporary Array) - kernMapToBoolean<<>>(n, dev_mask, dev_odata); + kernMapToBoolean<<>>(n, dev_odata, dev_idata); //2: Exclusive Scan on TempArray - scan_notimer(n, malloc_size); + cudaMemcpy(dev_mask, dev_odata, n * sizeof(int), cudaMemcpyDeviceToDevice); + cudaMemcpy(dev_odata2, dev_odata, n * sizeof(int), cudaMemcpyDeviceToDevice); + checkCUDAError("cudaMemcpy dev_odata failed!"); + scan_notimer(n, malloc_size, dev_odata, dev_odata2); - //2.5: Get Count from dev_odata + //2.5: Get Count from dev_mask int tempcount[1]; cudaMemcpy(&tempcount, dev_odata + n - 1, 1 * sizeof(int), cudaMemcpyDeviceToHost); int count = idata[n - 1] == 0 ? tempcount[0] : tempcount[0] + 1; - //3: Scatter - kernScatter<<>>(n, dev_mask, dev_odata, dev_idata); - + //3: Scatter (dev_odata now contains scan info) + kernScatter<<>>(n, dev_mask, dev_odata, dev_odata2, dev_idata); + cudaMemcpy(odata, dev_odata, (count) * sizeof(int), cudaMemcpyDeviceToHost); timer().endGpuTimer(); - free(dev_mask); - free(dev_odata); + cudaFree(dev_mask); + cudaFree(dev_odata); + cudaFree(dev_odata2); + cudaFree(dev_idata); return count; } } From 73a0612517512747794ae6fa5e6c7b69c888fde8 Mon Sep 17 00:00:00 2001 From: botforge Date: Sun, 22 Sep 2019 18:33:57 -0700 Subject: [PATCH 15/48] my scan is better than thrust? --- Project2-Stream-Compaction/src/main.cpp | 3 +- .../stream_compaction/efficient.cu | 54 ++++++------------- .../stream_compaction/thrust.cu | 23 ++++++-- 3 files changed, 36 insertions(+), 44 deletions(-) diff --git a/Project2-Stream-Compaction/src/main.cpp b/Project2-Stream-Compaction/src/main.cpp index 08f7f56..4090eca 100644 --- a/Project2-Stream-Compaction/src/main.cpp +++ b/Project2-Stream-Compaction/src/main.cpp @@ -80,7 +80,7 @@ int main(int argc, char* argv[]) { printElapsedTime(StreamCompaction::Efficient::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); printArray(NPOT, c, true); printCmpResult(NPOT, b, c); - /* + zeroArray(SIZE, c); printDesc("thrust scan, power-of-two"); StreamCompaction::Thrust::scan(SIZE, c, a); @@ -94,7 +94,6 @@ int main(int argc, char* argv[]) { printElapsedTime(StreamCompaction::Thrust::timer().getGpuElapsedTimeForPreviousOperation(), "(CUDA Measured)"); //printArray(NPOT, c, true); printCmpResult(NPOT, b, c); - */ printf("\n"); printf("*****************************\n"); diff --git a/Project2-Stream-Compaction/stream_compaction/efficient.cu b/Project2-Stream-Compaction/stream_compaction/efficient.cu index 2eeabc7..de2bb61 100644 --- a/Project2-Stream-Compaction/stream_compaction/efficient.cu +++ b/Project2-Stream-Compaction/stream_compaction/efficient.cu @@ -4,7 +4,7 @@ #include "efficient.h" /*! Block size used for CUDA kernel launch*/ -#define blockSize 512 +#define blockSize 1024 namespace StreamCompaction { namespace Efficient { using StreamCompaction::Common::PerformanceTimer; @@ -25,17 +25,17 @@ namespace StreamCompaction { return p; } - __global__ void kernUpsweep(int n, int d, int *odata, int *odata2, int incr, int twod) { + __global__ void kernUpsweep(int n, int d, int *odata, int incr, int twod) { int index = (blockIdx.x * blockDim.x) + threadIdx.x; //also return if index is not a multiple of the incr if (index >= n || (index) % incr != 0) { return; } //if we reached here, index+1 must be a multiple of incr (2^(d+1)) - odata[index + incr - 1] += odata2[index + twod - 1]; + odata[index + incr - 1] += odata[index + twod - 1]; odata[n - 1] = 0; } - __global__ void kernDownsweep(int n, int d, int *odata, int *odata2, int incr, int twod) { + __global__ void kernDownsweep(int n, int d, int *odata, int incr, int twod) { int index = (blockIdx.x * blockDim.x) + threadIdx.x; //also return if index is not a multiple of the incr if (index >= n || (index) % incr != 0) { @@ -43,7 +43,7 @@ namespace StreamCompaction { } //if we reached here, index+1 must be a multiple of incr (2^(d+1)) int t = odata[index + twod - 1]; - odata[index + twod - 1] = odata2[index + incr - 1]; + odata[index + twod - 1] = odata[index + incr - 1]; odata[index + incr - 1] += t; } @@ -81,52 +81,38 @@ namespace StreamCompaction { int malloc_size = nextPowerOf2(n); //CUDA Malloc buffers int *dev_odata; - int *dev_odata2; cudaMalloc((void**)&dev_odata, malloc_size * sizeof(int)); checkCUDAError("cudaMalloc dev_odata failed!"); - cudaMalloc((void**)&dev_odata2, malloc_size * sizeof(int)); - checkCUDAError("cudaMalloc dev_odata2 failed!"); dim3 fullBlocksPerGrid((n + blockSize - 1) / blockSize); int max_level = ilog2ceil(n); int incr = 0; int twod = 0; - - timer().startGpuTimer(); //Copy idata into dev_odata cudaMemcpy(dev_odata, idata, n * sizeof(int), cudaMemcpyHostToDevice); checkCUDAError("cudaMemcpy dev_odata failed!"); - cudaMemcpy(dev_odata2, dev_odata, n * sizeof(int), cudaMemcpyDeviceToDevice); - checkCUDAError("cudaMemcpy dev_odata failed!"); + timer().startGpuTimer(); //Upsweep for (int d = 0; d < max_level; d++) { incr = pow(2, d + 1); twod = pow(2, d); - kernUpsweep<<>>(malloc_size, d, dev_odata, dev_odata2, incr, twod); - - //Ping Pong the buffers - cudaMemcpy(dev_odata2, dev_odata, malloc_size * sizeof(int), cudaMemcpyDeviceToDevice); + kernUpsweep<<>>(malloc_size, d, dev_odata, incr, twod); } //Downsweep for (int d = max_level-1; d >= 0; d--) { incr = pow(2, d + 1); twod = pow(2, d); - kernDownsweep<<>>(malloc_size, d, dev_odata, dev_odata2, incr, twod); - - //Ping Pong the buffers - cudaMemcpy(dev_odata2, dev_odata, malloc_size * sizeof(int), cudaMemcpyDeviceToDevice); + kernDownsweep<<>>(malloc_size, d, dev_odata, incr, twod); } - cudaMemcpy(odata, dev_odata, n * sizeof(int), cudaMemcpyDeviceToHost); timer().endGpuTimer(); - + cudaMemcpy(odata, dev_odata, n * sizeof(int), cudaMemcpyDeviceToHost); //Free Memory cudaFree(dev_odata); - cudaFree(dev_odata2); } - void scan_notimer(int n, int malloc_size, int *dev_odata, int *dev_odata2) { + void scan_notimer(int n, int malloc_size, int *dev_odata) { //Odata contains mask info dim3 fullBlocksPerGrid((malloc_size + blockSize - 1) / blockSize); int max_level = ilog2ceil(n); @@ -137,20 +123,14 @@ namespace StreamCompaction { for (int d = 0; d < max_level; d++) { incr = pow(2, d + 1); twod = pow(2, d); - kernUpsweep<<>>(malloc_size, d, dev_odata, dev_odata2, incr, twod); - - //Ping Pong the buffers - cudaMemcpy(dev_odata2, dev_odata, malloc_size * sizeof(int), cudaMemcpyDeviceToDevice); + kernUpsweep<<>>(malloc_size, d, dev_odata, incr, twod); } //Downsweep for (int d = max_level-1; d >= 0; d--) { incr = pow(2, d + 1); twod = pow(2, d); - kernDownsweep<<>>(malloc_size, d, dev_odata, dev_odata2, incr, twod); - - //Ping Pong the buffers - cudaMemcpy(dev_odata2, dev_odata, malloc_size * sizeof(int), cudaMemcpyDeviceToDevice); + kernDownsweep<<>>(malloc_size, d, dev_odata, incr, twod); } } @@ -197,17 +177,15 @@ namespace StreamCompaction { checkCUDAError("cudaMemcpy dev_idata failed!"); cudaMemcpy(dev_odata, dev_idata, n * sizeof(int), cudaMemcpyDeviceToDevice); checkCUDAError("cudaMemcpy dev_odata failed!"); - - timer().startGpuTimer(); dim3 fullBlocksPerGrid((n + blockSize - 1) / blockSize); + timer().startGpuTimer(); //1: Compute mask (Temporary Array) kernMapToBoolean<<>>(n, dev_odata, dev_idata); //2: Exclusive Scan on TempArray cudaMemcpy(dev_mask, dev_odata, n * sizeof(int), cudaMemcpyDeviceToDevice); - cudaMemcpy(dev_odata2, dev_odata, n * sizeof(int), cudaMemcpyDeviceToDevice); checkCUDAError("cudaMemcpy dev_odata failed!"); - scan_notimer(n, malloc_size, dev_odata, dev_odata2); + scan_notimer(n, malloc_size, dev_odata); //2.5: Get Count from dev_mask int tempcount[1]; @@ -215,9 +193,11 @@ namespace StreamCompaction { int count = idata[n - 1] == 0 ? tempcount[0] : tempcount[0] + 1; //3: Scatter (dev_odata now contains scan info) + cudaMemcpy(dev_odata2, dev_odata, n * sizeof(int), cudaMemcpyDeviceToDevice); + checkCUDAError("cudaMemcpy dev_odata failed!"); kernScatter<<>>(n, dev_mask, dev_odata, dev_odata2, dev_idata); - cudaMemcpy(odata, dev_odata, (count) * sizeof(int), cudaMemcpyDeviceToHost); timer().endGpuTimer(); + cudaMemcpy(odata, dev_odata, (count) * sizeof(int), cudaMemcpyDeviceToHost); cudaFree(dev_mask); cudaFree(dev_odata); diff --git a/Project2-Stream-Compaction/stream_compaction/thrust.cu b/Project2-Stream-Compaction/stream_compaction/thrust.cu index 1def45e..0c607be 100644 --- a/Project2-Stream-Compaction/stream_compaction/thrust.cu +++ b/Project2-Stream-Compaction/stream_compaction/thrust.cu @@ -18,11 +18,24 @@ namespace StreamCompaction { * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { - timer().startGpuTimer(); - // TODO use `thrust::exclusive_scan` - // example: for device_vectors dv_in and dv_out: - // thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); - timer().endGpuTimer(); + int * dev_idata; + int * dev_odata; + cudaMalloc((void**)&dev_odata, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_odata failed!"); + cudaMalloc((void**)&dev_idata, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_idata failed!"); + + cudaMemcpy(dev_idata, idata, n * sizeof(int), cudaMemcpyHostToDevice); + checkCUDAError("cudaMemcpy dev_idata failed!"); + thrust::device_ptr input(dev_idata); + thrust::device_ptr output(dev_odata); + + timer().startGpuTimer(); + thrust::exclusive_scan(input, input + n, output); + timer().endGpuTimer(); + cudaMemcpy(odata, dev_odata, n * sizeof(int), cudaMemcpyDeviceToHost); + cudaFree(dev_idata); + cudaFree(dev_odata); } } } From b1ab20b2fe321beeeef7b4f1f081f16c6b73d080 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 18:55:05 -0700 Subject: [PATCH 16/48] Update README.md --- Project2-Stream-Compaction/README.md | 83 +++++++++++++++++++++++++--- 1 file changed, 76 insertions(+), 7 deletions(-) diff --git a/Project2-Stream-Compaction/README.md b/Project2-Stream-Compaction/README.md index 0e38ddb..8395ba3 100644 --- a/Project2-Stream-Compaction/README.md +++ b/Project2-Stream-Compaction/README.md @@ -1,14 +1,83 @@ -CUDA Stream Compaction -====================== - -**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 2** +Project 2 - STREAM COMPACTION +==================== +**University of Pennsylvania, CIS 565: GPU Programming and Architecture** * (TODO) YOUR NAME HERE * (TODO) [LinkedIn](), [personal website](), [twitter](), etc. * Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) -### (TODO: Your README) +Dhruv Karthik: [LinkedIn](https://www.linkedin.com/in/dhruv_karthik/) + +Tested on: Windows 10 Home, Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz, 16GM, GTX 2070 - Compute Capability 7.5 +____________________________________________________________________________________ +![Developer](https://img.shields.io/badge/Developer-Dhruv-0f97ff.svg?style=flat) ![CUDA 10.1](https://img.shields.io/badge/CUDA-10.1-yellow.svg) ![Built](https://img.shields.io/appveyor/ci/gruntjs/grunt.svg) ![Issues](https://img.shields.io/badge/issues-none-green.svg) +____________________________________________________________________________________ +## Performance Analysis + +## Questions +**Can you find the performance bottlenecks? Is it memory I/O? Computation? Is it different for each implementation?** +Main Point: I actually removed 2 calls to cudaMemcpy and saw the execution time get halved across . + +Copying back and forth (Device to Device & Device to Host) can seriously increase the execution time of the program. The CPU Implementation takes wins likely because of this. The Work Efficient scan is the faster of the GPU scans, but still requires that we copy. -Include analysis, etc. (Remember, this is public, so don't put -anything here that you don't want to share with the world.) +## Output +```bash +**************** +** SCAN TESTS ** +**************** + [ 40 42 40 23 36 45 21 8 44 38 1 3 25 ... 15 0 ] +==== cpu scan, power-of-two ==== + elapsed time: 0.000642ms (std::chrono Measured) + [ 0 40 82 122 145 181 226 247 255 299 337 338 341 ... 6248 6263 ] +==== cpu scan, non-power-of-two ==== + elapsed time: 0.000321ms (std::chrono Measured) + [ 0 40 82 122 145 181 226 247 255 299 337 338 341 ... 6141 6177 ] + passed +==== naive scan, power-of-two ==== + elapsed time: 0.315392ms (CUDA Measured) + [ 0 40 82 122 145 181 226 247 255 299 337 338 341 ... 6248 6263 ] + passed +==== naive scan, non-power-of-two ==== + elapsed time: 0.2752ms (CUDA Measured) + [ 0 40 82 122 145 181 226 247 255 299 337 338 341 ... 6141 6177 ] + passed +==== work-efficient scan, power-of-two ==== + elapsed time: 0.08576ms (CUDA Measured) + [ 0 40 82 122 145 181 226 247 255 299 337 338 341 ... 6248 6263 ] + passed +==== work-efficient scan, non-power-of-two ==== + elapsed time: 0.08608ms (CUDA Measured) + [ 0 40 82 122 145 181 226 247 255 299 337 338 341 ... 6141 6177 ] + passed +==== thrust scan, power-of-two ==== + elapsed time: 0.105824ms (CUDA Measured) + passed +==== thrust scan, non-power-of-two ==== + elapsed time: 0.107616ms (CUDA Measured) + passed +***************************** +** STREAM COMPACTION TESTS ** +***************************** + [ 2 1 0 0 3 0 0 1 3 2 3 3 0 ... 0 0 ] +==== cpu compact without scan, power-of-two ==== + elapsed time: 0.000641ms (std::chrono Measured) + [ 2 1 3 1 3 2 3 3 1 1 2 3 2 ... 2 2 ] + passed +==== cpu compact without scan, non-power-of-two ==== + elapsed time: 0.000963ms (std::chrono Measured) + [ 2 1 3 1 3 2 3 3 1 1 2 3 2 ... 1 2 ] + passed +==== cpu compact with scan ==== + elapsed time: 0.002246ms (std::chrono Measured) + [ 2 1 3 1 3 2 3 3 1 1 2 3 2 ... 2 2 ] + passed +==== work-efficient compact, power-of-two ==== + elapsed time: 0.19456ms (CUDA Measured) + [ 2 1 3 1 3 2 3 3 1 1 2 3 2 ... 2 2 ] + passed +==== work-efficient compact, non-power-of-two ==== + elapsed time: 0.196608ms (CUDA Measured) + [ 2 1 3 1 3 2 3 3 1 1 2 3 2 ... 1 2 ] + passed +``` From f906f8fd11baa1f2e3d2b48fc6712d0445488fee Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 18:55:44 -0700 Subject: [PATCH 17/48] Update README.md --- Project2-Stream-Compaction/README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Project2-Stream-Compaction/README.md b/Project2-Stream-Compaction/README.md index 8395ba3..410445a 100644 --- a/Project2-Stream-Compaction/README.md +++ b/Project2-Stream-Compaction/README.md @@ -1,11 +1,6 @@ Project 2 - STREAM COMPACTION ==================== **University of Pennsylvania, CIS 565: GPU Programming and Architecture** - -* (TODO) YOUR NAME HERE - * (TODO) [LinkedIn](), [personal website](), [twitter](), etc. -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) - Dhruv Karthik: [LinkedIn](https://www.linkedin.com/in/dhruv_karthik/) Tested on: Windows 10 Home, Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz, 16GM, GTX 2070 - Compute Capability 7.5 From c125853ac4c58773bf7734b695805f2b35bef032 Mon Sep 17 00:00:00 2001 From: botforge Date: Sun, 22 Sep 2019 19:10:43 -0700 Subject: [PATCH 18/48] more done --- Project2-Stream-Compaction/img/output.PNG | Bin 0 -> 50868 bytes Project2-Stream-Compaction/src/main.cpp | 2 +- .../stream_compaction/efficient.cu | 2 +- .../stream_compaction/naive.cu | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 Project2-Stream-Compaction/img/output.PNG diff --git a/Project2-Stream-Compaction/img/output.PNG b/Project2-Stream-Compaction/img/output.PNG new file mode 100644 index 0000000000000000000000000000000000000000..abeb9d54d58106a375274fcaf1fde342ed3aed11 GIT binary patch literal 50868 zcmdqJcT^K?yZ?(75Tz;>1f+>Hu>nep#6lAhP>>=KP*DOzMWlwPfQVF;q9DWqB2q#} zTB6bf#E3#d4-kqFAcT;RK4-w^dEfo)v-dvxx7Jzftabi?Br}tlJNMk>`dr`dJ+bG` zns3>(Ym<7VH0DE(&Q1SQO)8L2o-exvtLPDiU zVmyxx;AhdB7R~`eLgI}pe`{J1@2?06wOCr7G&>*cKF9BTK{m#pj>{IpdSlm3yJY%gd%67x^YxAsg;Pzgzstx?j8d-R+Nk6V_JbPYq%t$3QtRT9>0vsRh~fA z?FM&p`UamoZDgW=xQj8`e{bs*(WtSuGrFz;3`~H>{AitgYjVsDuDHcc5Yz!)_LZWb z*O>fk^=(#G@>=kid-{KsU{4PCNob>)#Y*uuKUm$lW4+Q!bsP|ZuGHma$*8U1%cnj6 zx#(LV2N#D+BEWqZFDd)Qw^g@45h9kV9Or2y6cB+!36{AyplEz9m+HgDQAvs#T(*F} zOw!z#MoGdj`cW`k62Rw00Q6IhKOYpb(fjTW0Z-gpUZbKG<2NU{ z+-|M<b^=n!`K#QiZJ*U z{mdlh_{}2Zk@H&17C|SpOG`wQ7`HAZ7R#8fJ+DYDx~gn3X{fu^9_km7VGk6|S?5IH zT~oH%S=q!i$?D+5Y0bXWC9*L7-JokBH3)a z)zI>2%6v*wEj??OjkKqgg(Xr-D+xo1N_jPvhp86Pc-^f49zNfA6{;yB6WDrc{po4f zu+hW|cZ05}G#ygf<_=Lv-;^I75$mXQ!AAl-ba_+_s<}y?>;JTPdsF0xXX|6sOqW#F z-G%z>?}&C5yP%Zo`-@rm-S)uL&#u2t9kkkPhUNdurt!}FdBg zU=)}9bV{ymjg|y11vT-h2XFL-hgjVM$uzRR-TO`$>hk|_;~uoKXv3%HDE9D4_+=^% zU}?yJ^|X)vr(s{4yZJe3kz{LOg8f$JO(TJg8~xziGexF}>(|s4fQrMA%y)&26(0ZS zBN*oCd&y0h-9^r@E^oi)o>TO5&65R1xRflD2<2r|xcoEtnU5G*do>TPn^8TVSPMbl zPpo$8#5MI$5Vs~HDi-lt%cO!E&LpiVV3t0$t;TrDD=NkPx#Ss5yUEYnC}$tDVyTaP z$kdr#v5b9Aco?};HxcE22+xJ--wv*po#M|XIo2~!SCtFckfdlQDQ3hX-j{TkKguR) zq4qk4qfG$EVi#1x1lrrP8AyeWrKS*;+u2k4%?w2dO49!6nX}UNnL+oQ-?+K!itQ8C z!NNn*lHVtwRRYmjUoqLZcSmqHk;BKmeWUCFjnbK3G1CywLl`#fk?^z+shkLNSi~*N zhAd0ACel%N2SWtQ?PxO%fwcE`T&fn4M8}txi~*@!s+AQbY=W20XXen&sS{2#DX6Ax z{3PY-nkhJaj^wR+1QBkt`FBsE_e};*uZS+zHs6#K5(wB?`1zF;C;TiIh-&pg*5QGJ zvhJwlhwSfUmoH3_Y`~l_wJ#(beGE`2b^%@vLhx_2fS0@1IsK@`VqT&uEXPon(p9FC zdJ~fkJ?KrUS5?GP{nchp{i@~!!c>CEd7<8q_KkvfFBH6c9`Cre!o+I=+pYFR6wER5 zaX1awNe;bO%53Kwl$o0MHIdrr)X~R`>K}sAcJtrgAqESLlomt(Db32kGHzI{XU@`n|YJf8N=38B~&#)Kiqk~*M}+k(>+P?r1>?p zKQ)1Pq5xG<(wDM5TrI$YAI;tuMkl_8NbShS?+NX1dgD#{xtwTv@NM*ymQ0*C+8-Ro zQE_{69^+1>h&Mdz&}qDF74tG#ye#3|Ca;#rer-i5`_#yqYv^bNU?;QZCf{qYtuGZ{ z^+@qbcCGEDpc`n$3o)?*8&N#gr9t5_v^^8`7OCbrv?w@QH&`YrB-9@qXJnryxzNrq zyOt&)lDhBRiv3`nG$sZW#eVa58f&}Io@CK4~<}GqhGoUGZ2 z>%vNx`d1lr|GVIsxF?T;B4W0N8mYQ{V<30TEN#|RylUbVT*jVq z|9XBrDPAOd>Z{~U`qz+~hIy`Y>xAy% zS_ecllpd!}-^;KRb~5~W$2sDhi-1J2m56X;x1_zIkO1r&M`IvLtARkV&_5!^&|^UVY#PA{MW=M z77w)tAjJjtq=X^=WBG%v2a9V#L#K?unBh7BZbwxve%v^qcd6Zq z3+t0{w|4u6M2va(W2R(h@@lOC3ab`{LDo7d_bLbG9kWsd-W{DEA{}!7P?u6bt86&d z7BOTB!~MXHKs1(TJ77fSt8T`k>Zh#W*knzTT`v|c?IY*@*5J(xk+Oqc%DqY6JzIz= z$ZH{LQYqIhB?&yn#e6q)ssfN$60gX4DebufPNhq`a~mf-1gGCP(ur6YM~k&r6?vhU z;wnfttYp=$BY0qLl;XDH@coKh(_Z=EE0Dh33D*qq=VkP0q^L^qzc}e2DwpAoGPte5c&Awn0b5 z+C-$|&kt%3r1M1c#eqc07Cfgs+{yLQlU;0aKyrrlX|0zmcN+;vj(QKq6?DO~Uoe>$yGYeD^4ZC^%fc`t8 z;AY5klw)zYas)}8{m5S?9R971jw_a%@umd|`!FX{W~oy4D%`hG-eH=gLe$+Iu3Sxz zJ`A~)LV}}(Ft736i&tP;rAZ(ry zUOBD2(9+SG--DOAU~$NYS054UvS{&~?qsQxI;CI93bK5abyw^)s^|MIgmb#~jZN-c z?{s3gpZuX)Hgk=ynt@{kLwB1b8^@uiwyw)y$EjRAP$lIA4rcq;GvWE^6Y?Y}oBr1}V08h?e6rL0o z3Jb252x^h>6$%1LeZefpNPTwOM>$$?2A`($DGOxz@ExOGvShDq_X-{zdzrK}V#1bL zW%-d!|HS^9)a3r~h8MU|FtEAztWA;B#qFHrisp;qIJ*=ygUXU!v-3QcP?IdKx0)YN za#&Z9{g}5ao>2aHhL1K8*yoJ^36B&jUV95BWxC~Nb!nRc1P3zvEGvj1pJ9-pQA&b=jRuZi`qYU)>36ctzD&EbcKH={yR&-Mw zeUGeU7EwqKmU;#hl=Ng|J;k%He$Mu_o>`{#LgMGY(Hu~QM;~BPw;YP#5WEJ@Am!8| z0z-W0eyHd^3NTAMk@Xm8Ry5a#`SE0d>RuT6J&Igec~)14;zUO!E)ivbVb(B5;S$i# zVsnZD~b zIZQxhpj#iI%%7Ua{$%L^@IwBNNIh&d)|>!QS0werJ+izNgD0k4B!)u??MWQi@0~aU zNaf`O*kjyBcl-d#X@u?is}6!LH8r23+|XSj5aTOOS=fwcs&8ytO4Jt1R%tTzy50V4 zT&qO_YeUA%z1^x!Xt|NACKbX*AB^Y(zLbmq{K+a3AZTcF42n!?t6K0kZn;#ZhZuOE zhn+%ob{StzpcYZf39w!^@#3%FNx_|3wnP_Nf9OOEA95gqBDnr}2I5X9Q+INyJxlYX zC;X5r$i#l)&~Pk7BC30j^+?nPi@2>9O2w;GemA$RV{9Xf9kgsZUdYg@N|R17-~Cph z$sO>HhtYA8GN+boMxxl&EL<~6p+R61YfWuXxqA_;CtGdH@?=%ll9l%3Owmo@)hTSg zg-{1VC-PxQ2j+cofs(-!U!(vs6vflliEBa-x0~S9WfuWCLB_p*rW^a2Q8~iF>@rvU;c)b9>gG9%j z!oG0T=*0B^*?{qBp4=P_Q+b(58U=nte4@q@STsV=9lkZWHKx4KY!hrxdSKBVk)?CiC&Btjpt51rw()z<7hEa;8X2E&McV=Inm)!t)a{&8 zI0y#rs=0&-jm!?YB468z88^fXs#IV^lnZ$80X-A)@}J9dAr*3iD5tJkl=<@1pXvxp z*pAX`QWVKIgC5XmjUlS{dFm{_Am<(L{`^Eeo>lAT)xA$m83UBJ5~B=LZK}d64vnDT zU?&PAnt0aT0w!tOnV!w&$J~L8bHv|uL1%1;tnZw}Lew%(3qG3~7XO|42+(U-DS_-D zEtmSmI4MDH%FYK>QU9fP$L}O*Tnz4xXVcKJ-f1L>K&$C z;5JZ&NxIpM4dr4P%gGmi`z5VX-SgvrlA?5>Vxpsirx_5lyF)uIKkvckkS4pV=C5eeziKH(WCq&Ig+!RC3-}lu96wk5q5T zh@;sDE=ep_`eBka<*&tNARkl^$uTD&<+jz!EtJm`NGjaJgN^9MRw9we$K>q zF~ND}IOYUO`G7szY$=6kP425+YItk=?Yc(}ZIarTHvn>NfGG3>%hh1|$FZ8hL)v+F zzKTrm=@ZF4aLssM=c1I-g=0J5Oy}P;viGa08VqQnrbKbA$M!8$_Ra`TaXRPDU9sZA_~Vko9;RP=6Qz1 zn!xJDa@$8SS%xR0CSHXkBGu{zMWuH#Ww}hctHj;%26x4v@O&-44N)P9)OupZiaj{I z=y!RWw8xxg(2oyHBc>m>B>H?2aMI2qgpQ~HlcCvo*TE-NG0^G6q@edNCR#>+S7H&vno^)$5PDl~0M-{W<$zh-k`NLBxy-r$7hpZd;3^IbpMJKa@(jZnQ9f^J|>^G`-vI15+qw&IL*W5}Naw4Y1~2Ml3@lU2ECkynph z?~iH^-P1=-xf$K)|O=n{mYruEy z=szKd)5;DZp})0;uYOV8lU-|svF`+$p;!UQisPz`trho{#(uncJ3gxvVzpopOl=j= zw8|bLlLFU#6sxXN30w++U_8fPC3~-#_O_lFykybry|Z(3@9#>8Jys+0EB__LyUiu0 z89=Z)zdAluJbf~T^-=b}fJVW>a3oF)>XUppsyj;*dNrG>eAJN821ix6dBtwHfp%{- zxAc|Bf8LY_C!vg9#3`QkD#_D5E88A{D>Rxcj79#P8ol_Fftpa-$GG;3_kBPLNX`M9|q zNVZ1YXcm&fDW*Fx4!viE&7CG>3!lYY9}((@Uw z{N!au<^l+0Q?c(hOsm+M9!txb7J5>%!3g@MZ!ZMz?m=_N8@d%9SQ53)dA4^i31qIf z6E|L-)FvsYW-cZdiusk765yo~ZGe0D(EW@A9VT(iAgOw!T>E=Ht01*iuSBAEOyI~kW+!vdc2YmwN9 z*T~N(`*{#-tHxB60vn7)Z>qa1=d=fHWyW0cYlJVIJ|bhLf9=eA=IBhHcbb#5S!~*R zk4{)pYJ1?~JdG;S!Fr9?PtM;9JL+i)qOj|2;6iC-z^1 zV|?4Q{*<#4li!IaFVm6%j920;;{S*i6Gr>I)rwq{_ucw~Xdb*mNXL{~J=477*U^kU zY4JxNA3Vpj-d#{nNCO-i?7_-RV=T_a||(!aH&3DT9H z&G&U(q+R1Z6gsqs6>v^GKrGUGoo7l^(xN6D{2k`iG@pJC#S zhPgU_!TV>Om9?rW+skccnNOZt`(&!k$lmoj@*So=m-53tQ|;I> zCSvAJzfOuTiJ&2-f#E+{ywn$UwyEIKv4X78J^NC^1l28yG+N)qpp9P`r@002A0?19 z<_1#m#OLK3@v+8C^XQb4IH{hhT)-tP>8YQ~x!G{t>p+}|weyLHZ?gl1=%z>Yj)CFe zz0{k;|27AZ5zcOce9U{VIm->XKs zP6oY0ot~qQ(Rb1co?O&ywiS))KGXZ?hR4qbrhRGMRB|b5#}F9U3hvxo57Uyn!doh8 z-SHe5tNmYaVne36gB%VVSDD14Go)a@qF>e>oE{!ph0kch8LaRb5vL!46*OSNEZ^i* zq`BApprCB1EJ8S)-*w^!??~b{Fv2GF1ap_KM;Onby?gU$V{{9i)xdB=OmfB}IfPC4 zsLSYqqhFrxR=s}y6&UQNUi1VQTYCPGqW>e#>4pE7N%0&EU&YRv2FsGZFh;-53oV7; zPtN$dUDtoivdeMn9oc4M$P&i@o9!z`-RL!_=1Q;9SwS@7?#^`t5TZ@t{OBxHf?k5)5wU*pPSG|@kyAu3 zu#Q|kpCnr-AnQQ{*`*+t;RQOK)H8r_qiMo+v^jY@82L*dReJtYxiu&?xAFbWS+_^!tQhg();w^(D&-kV=|t~}BhB*`6k7x^Dik!Vp?1Qv7IHZQW4jyn$a3$-_uG0`P*M0zwbb9F zxRCD9`sN7W2!sjfJ7}2H@Y6C+`%p2!3GN!4x(m70mDI)1O;9A!tT&4|)bQSyxOijg z(=Vgc($he`hjbZ51P_%!T)OznO{6Y<&;}DOF@w}6IOcCxf?Go(8={l+lK zzkU8&R@}YMk!=TNJ6foHibcN#VNBF@gO5P*8*jjcZZrTfpL1=0$y#c6l|t>w*(mxs zM?WGM`BW$LSsvUw5xw?|u(?7BjS)O%m7@QE2=tNR(5!Dj=HgOU`#qqD=E|d9T7ERw z!q04l^~P;eoCxN_p_krLhY#vqA{$YR$^=7nOkt_h{6RyUih#J3NGgZ8+-yz9iqVbr z{a>2rgEP#oB5w872yfQZPG$&=UC7$zDrFz3ovFKiu_^ZM#rvU57EL~T2ByXjTsKvL zcwS<+X|6$lSAbwLxu_{_>~O+(0*sEb=I+8TP`?Yyi{}4f#lwsKKmP|*jJ1Cq#0S$p zWq>q%_tr7N<&cCd3Gr{#&A{$Td>medmZ?(JJ=VJRkKpl_{x^;g$-m=@p)>Wt;qkaG z+dK)O^T#0}`p;n$)Fm#$q(FDgJ^WfFy*~yUaNDY5Vni0IiDWE*DxhRNs%(xqoi~v8 z!Ns<^$MzVbJ=z9%XwjdDAcC^XJ^ZL6)T+r++JH@+{0i3zfAGGo9WFs%YCnPcFc{V; zzv40a*e9QVA*G2VuL%s>@4qtEXxxG0J7`t6JjM;lP%CZT$w6Z z*Zs9}mPo3nUYDok5%rX0qRyIGh=zm+49;b^ecd266~E#!L7@lurEKV4A6c3-i_I5s z1pF7#@qO;GyD{|_&HGB&EZhFY6?e&})_?({Rd-a7@IOT**FV6$lJf*l__sSLIZ4{S zLq$B;cg@zX=-U{@wPHd-$sm0{Z&T#;#~F8T8{HtZ;JnbK@F||*aNK02rcINoCG9C> zsY&!h(OHz7F7I21y+2RiU9$t7p&W%hQ*UZH!oqn)Px4#GBjw~l#?t8;Gdk%MX~$kGmUNa&YS zj}MroKl1ePFYqmyOn1nH#i0@}GZlH;rVt60Qk&usdGMrGDp@HoH{72zLi$;&#zekn z1k4UFOSc^vI8^r^uzS80{ZS>gouT9?EnT@Tg#G?hzkgI>FG)SaM>Xu%jx_!*L_{{~ zXQkaYLF(_4^^cg&ynkx5q-BhHj!!VNXNEDdDgP&!w9}HJTnn?F!<2XEWPieS)x77ol6g#t#axC+Y{vhPRc@q^xGCdiUgf z+OX0uE`v=UJm7Dv{>#F*D#$tk65*YEP>|KV88tznhco2Qs)p0Z=ZuvnU)wl|t;j0jm3$+yu6YRJi|QpXKqd8scFW?_ z(H_EdmLo2>Qluvrm>*Kt@Bdk1Ce~S z$2MUqw;>+TeAh1YW@;$jdRMX9%M|tJF5>QpI0Qs@hQgD0ZX&G@;4PY zKirDtx5{}^9DAD>+6|P%n46i&()xT?=qu%@XXrHM76EAFDk$Yi7R!CP>)_AEDCZFL z!+@_l8sIxQ8HAZI!Svn`{(<$*BthqtgFD2JYY&M-A5m{eq90@Sc@s;S%0g}(9Z}~k zq^+$Y+c;;dSixh`3I#*UY8E0Rkl{0zK z8vc&59(A|r@^vjn-d(%GuUTbSR(cKP;+Ba7_Uj84H)t|bb(<9MSYBYKxqWq@uFxlT z{YeXF>YKuGtqKJT!>;GC`{fSrHRP5o%pUGAl34dzZH)_^=#_gLd5*yfin_}CF_p5= zcrgqe7S*U+q03c@zwkJ&rpUD9PL@5;1t}@$!$drrU8W8= zvW3o!d$wxp7SoPMu)DmyUhYtF#f1n+RavbEChw|MG8V)ATX3?QuGKuY^_opkLF>>EgtoD~iikA6;oh{WXuajx!~ApxCO>t}CrZt1;(1XmrRkh{H;1;SGj~qk zc4?pF+}MpbEx=s%_C5qZX8es^Q1kBg+%r&~P8NUAfNE{kmftuU>M?Z0y}=#~qQ2oj zSGT;p14{z}ht5G(ZeO+2n0FmE^ETzxa#YIWq^%GmrLK>t!b<5YkPS)`b1<*@O4e7R zZi9lXz3q41%2J^bh8xqsUae5|i8^P=P;Hl1R|t86+&wcDdsMY6F>;Tcbs`AUTZBX6 zQ0eR5{F|i5L|!+GHxB!r@F{v8W;B62t8Ttlajt zQc2Ti<1{AB2NUgB>-gqMb?=}w;4b(|5o15E1y1ewz#I-G&JDEK$2XTDLx+WKn&`^d zcZ}{8RemD+ti*uFq`m# zXJdm^=tpsCPg&;tj_A}o=T}QNS5V13cGfh7_#sg}$Bny-ZtWJMzh;vVW^_egwSW4v zO-_h;(}3Ina|T_yCDkC4s-Cl-=YyDYK7scPagXl;UXZEi z)CzuMPks2f)*}4CJkEKXfnu_(vmO8j$`L;vX0u%-0N+wv(_vni>WhkE^aRj(IOT|Z z*e5xXE9otT%0*EIjPW583YmikpCrMwP!EBmc-?V9In#U#1~{n$DDB|tU@Gt3mpe^DYCu%Io*yy9w)OKKH(gn=PG=htVc@FL%2+OKK_^0k?P zeYAo;hfD-ltA9n3QJgu?(-j;oWBZU-)H}g0bG{s=C@}PfGcE%N$iZc^mj6h<#1=de zw;ZTDeNM9|*J=L}2lmSdt49h)EuGs1N$_f3p&q?qkm~=4n)H6S%UIpR-k){LlJ+uP zG{Tp;$T<<~g|>G?>t|}nuJ^ZbF=B~?%a0jeu!BlHGEazrGdZ1yZVxrE{US%AqcAIy zozYWctCSUN^qPxND5NzoOv(hu*ers(7{!?T3Dh$~7YOB!5#0b3&uU=jZ-DdcdCiQ_ zVL{zmJSk(C&yHV_4sF9;S7Wc`=zu~m$@rkfyPzx13%iw=JD)jZtHKUOEcoq04e!Xk zT&jOoODFa#1&F7?1>3U&lhaxd3{)Io9Lkr2*;Yjdkq^#4EeaWs1>J@KJaKXjJRE^U zR|>v-$F*dfMm53RLuv_+i3BakEO4LZx@<1y2+%SVQTSdI=3NkwIEz=I1axEPnP+(+ zAc{G!Ro6c5lJov?V2Qk$?pcNRm>nRTDsS{Gf|XXLTsI!u!Pvv(K)iSvK=ku{ZxCzM zK`{u>5zv1#5LRxXRQb}zU0^+VT4qG6_JNvjH5TuJdOc2JTz(X0^uL1UWyFGmKA07V zFUH$(T?U8*obX(nBY3;u_XE z7FIT(v)DOkbp!N(F7u`ij*+e>p^7$?1L8mob6tdOMN3jO_ALdr3xc&y+adYxHoNOe z<#+cG-OmE6{W__N4Ah$--a@Shs^Sfo8jE|&rEUskQhy7K%S>bDDA&;oW0;4aCcY*4 z>2p~fj$>Z25p*+DZYO<~R)j5t=bm^K>E^{t?#bSv>t$2=sT8j7Zd3uFV-)&3RuKNg za0NRNscv3sAc1R;ge%#uW!g^T5Tj~OlN!8yYFmN`L09;7Q!r;Q%%Inu2y-50T7IPj zBZ<1m?UNzCgGZC>!8E&GJzs80uj(&%K%4C_C+zD*Fw&BpiaD+Ti;`VDzJ+*J0fLP@ z$1iww$>8SMsnZ%2%N$4;uYTUt?g|$Dd&puKbR|?*U_c|seE2yFd)WqxcMC9!=82n$ z33wDr*d4y>B+ePuSnVE?;QfvI96%cgT=M3AY!4;kALCTGx}_~OUYc`${i!R07b2npSUfY@)Kgj(>`T|DO;wO!~F zeDtl!z0Aw}t#%SCsx^@=3-v)7f8qT)aG#GLN25cW+Y?AF)>qjMzZ%z6&EadSrg8LQ z!fgZXO7vH%u`hZU3)yN-hcoc|Ky&ugvN1O?5yq)`Ak=b}Mqen3FS>|5ySZ2ihkc$J_^s*!$kJ zkG;{y$pX7SQ%H1@9hbl?U{!E*6Je}liZqWIyesJkS|q+ZPpQH>n5l!L{+2oHS+1Zm z+DQUsZ%~lb`5U7XRxw(m3$A2mxVwJw=J6fEU@C0|qX$luFVCA9Yj-fLV=jxD;G}@1 z5e9+{<<7oGUJDxn6U6@#vUMn#|FyB%bk*2Aa-qJ@JWR98TlkjoX5L9T@slSkny<)9 zJmXzH$Q7@UKN1T%ovYe6!Xu7VvMX6rBJKd;VrFFMb$$v^#Pn!*1Mq3T`p^Db&c=Em zt#3Vh-%stWp7Zj{%m0r)=j;{YO{JV@GTa^2fB$26x00U$)O?mIW25)9;l}2Uhy()& z9x~m(&mBk;`eAD1gYI)rm`>6K(CvAlNo_GOi5gSbGv*cwMKpicsx1Ez`r*hQS-x-B zk<0y)A#kFpgLdkZneFLKaMk|)`#U%<+sDkxLA37vZRYa_t?gFO`l?AXM;uN9O5GI4 zKEtw@sU`gs#&tPhYWZMM!IF{}V7#|`#gip%pNO<>h=gkPUT1Y@q6(1UI;kmYc!fG9 z(n4~t(vqIMRi9aDh=JA>VX{3Yct_7b`hkI6UMCM$61s$4J9sWpR!8^9QIDv)aA2qU zzr8CQGL4fZ5nHy8)lV56$p3H6&%I#F-jr=V_nmmDG-?N!iIP8VffS+nw}v`>`4Hf= zk`B8G{=XUa_q&x5>#u?6?*(+X<&-MfGr)b!3*6&u47Iu}9GQj5J`ugpyvB2!Fsmq` z99BmyRS{ZH_+xoy)IXL|Qo}EH;UBiw)FnU0D`!ks97LV%n%wE!VX*0N_YU^qswf-9 zQ~@A;Bb%T&9P|80_+=BcCR0Y@?k}K4!EI(safAD_kJ%`vix=5S#dt0fE-N$x#tFwj z$NY;uZzkFMA7H$)a*&2R@_jeR$;RLhsMxp+YA6@wWOeY0=ra^aLM_EusWL5nJ<=@_ zGjF8zho{T_;%N!$Q4OjCm81|7!v7Mg z)!Z7DwC?&!kV!stX|I${i z+j)h$5m)C8-Jv05f&Vsp7hV&)_(5yAF6t8fNw;6ZRiTd%T`7A)-2tr-u_ED&v*)k9 z`PLF2`_t6dnEP$LY9un@9-#Wz`KgZoc$a`a0Bfu*5|{~ z%T%`QO!pXBe4OduekEetK+#3e?z>9J&Mt4I3xK@4kQHZ(0M~-I;LYfz5-S;p>5)C| z|0Lw)Nj<2~v3mi`wu|b%34N_EnlDtJVJcBqa4KaYCj|H_eY<#^4Uhkwx{k~q!mOza4sI}*iomJCi5BFhtAKM zFIS7`H0@-JCxdBaH4(d7 z%keW>y?rk>+sfF>3*?pLRl@*g!cJcd``9-xqqDIiGH{!CfeA2O`#M5aG3b>)VhAw_ zCZ}4oohX?El)CCJGK581$ek4~?HfY13^FoiYF}%+;+8^XaL!?HlA0TLGY~D<#O!HV z3mUFgHD8D%y7f)!C&jiphjdvu^bM`aQiknw>xCZ3m?SFV!YHc<@(-3qB8LczA+aELg~+R?`96`8mGW*43#ys-U$@*5*Facfpo4A5_k_{r`3z3MmK zwOgTf)o*Oz^~Z0l;|DD0t@w@kyW+VBFp>68zcHcl#D7weH}Cyl_>Bwsc8wH>T?gpr zCCM)^5gokmOL)-L8*Zu+p(kyf%hCWfQc<~sI%iyds1H2lAdQ(46r%uMH(ES#1b#I# z9*t$}SXZw5q#1M9)(<;}2@V4b7V+MJhGV@AqF?{t(y(Pp3gM^L_E3-|XkvVN-W`YqhlX zEDrnHB~pWsglyhoM*;%|?*!N)N|Q2VEQ$aTr(s2wq*^TQv)hH6L?u)zhOSy_zjCI= z$*y5Kh4k5V7~@Nn&#rhEPz@9VouiX^AgEf?wi78KAX?2(`t)&!SIQy{mL&nEdshbp zJ@#0$jj=}m;|KF<%$?f};`7!qHjsr$?`Ts^7sO|gp730xN(%Q6gL=#7E;1hGw`m@hLI0uLLE($1;PB#QKiLaC+R&9EhW!1L7*c{r1iz~^X zUR3+v7>+?<`x4|A1Yd&C2wGA7_g3h-m|55~H9y<(asP3vYbtVv>Jn$@j9AbJ-PR;j zxz**DMqz| zTpHOZB6f|3X1d}fN6KvKvh*xw{HHy5*VIE>B`|VKbFEq+d)2T0>R%e}uV5NGo|W2b z_75IDBdvdEvXAVFbS7HG9G9qEsUHH!<$>HLi!YLecZ&oPMB&zLr}>%U#WuWc(w7pz zb?$=d+260#Ba9(Qg(sF-scuuLVNuM~bCZ3u-INX%_qCS6#Xg~21d zxu9X*`h^VIJw$74#Tu^kyt_pLV_S9#v3O?=wD8}X9{IXXAFE*N7W;F>{}2{#^Zpqd z<&bUavU35a7ZC?EP!vjsKBDL>v9Qvyx4SVYoA>agA)|Ni;Njj#a{?|*kgCUQH#t2O z)C6P<)54hMV)Lw@K_1zyEeZ5}pnuE#Z6Hv!qTYhG16@<}Hk_vxX!E;Dk3OD#v`bv; z9H!5Iwn{ye(@>htG04TDgNT#SM#c1h;JvTfCnDgO@l`~}K%6-Oh_y{2TEBxyN7 zn=@Sb?lEwcd+brmvAw|1?vSErTS-0VkU5WR_mJPVsbQEJl@*oMpiZUo@l#gai3yTx zd-R#`=icmUl+Og=BCR^i6Is4{MRHo8)h8tP1MeqJ?!EHnVn#=Noo()IP#oM1i~Eqf zWp%|MxdxNmKPdHhf>S!Dr&Jz({`w=0g24vvRgwHX?W@0DvC}U)MUa`Z?+#&csdn#` z2R}5xrH;CIV58MDv6?+&FKOVEF+vTfgXqJ)p6pb1UkkyJU?d!*Jo(^&))R8-cFI0E zeMP|1)wCZ>wX%Z!!^~HnUNJKNr;h&>po^`l#|w%7-25OpVYR<0rFYhbJ%tq&RYqoO_mEn zCW>hI1<~?+^=(n4@0eUL7@#5_%UmcqeFFW1KsccxWL`RD^?xg_q2k=lrC`Yds_ca-Qn-VEEF(odX)NtB*3zRFis zoPigm%vuH0pD|s6H*kNs81zFPn~p!*@*42v*@uVHvIH;}MWrS6gAR|VAT3&xj5tka zG)%KgachK%l|}L=%q zC`;IP*y-c7zAJ9Of+wKQV?1Ef$?c{6VN5to7xZHjeIiel6dYO}23_h!%p^}HCVZyZ zdmpAb=tOu3?NVbo#t{?qb5|{B=f<2Y%$!h2MSb62?Ys6Kt}$%z$);E<>FE?V#g(90 zIz_dgz2}g4Gw){k`l!=S!_VBidpohsP0cd3cll~8Hu9_1_QH^h!XcvN4PH2nt=ZZ+ zQI}nJx%TxQf&_?nm=Ui5`>p{>*9OLZB~Tx-7RSO!F^@?P`Q=QHR_|3$xrqmAose5I zcwMX5yHgVso9F)rER{c3)8>>a78{TWbswftmoZI80vE2vy;Z#%a9EF(~ z^WoM<`EB;GN8HPPWuh+>&%rBETgy?EY3|{*v>{2DdpPlWA0#)_g<&wsFgr*Mt|@0$sPD(U+W{^PrTCX zROxp^Xsz6F#5U#ERCRfUnygB8Zlrc!^=@D1@L(oJ_zl%O0TjmbwxdhycauU;vblkN zpUV;XLFX(|#~S~Dy3>Lz)Jh~a;Otn({XFr+p}H^HZK1*mOBk6z9lCy zdOM}7Our?x@7Wx7MFpl0-5##=lwn7!ZrMF>RMnaeE@C#Z?o)k$+Y7rUT}y1NsNQ_Eat+jSRiP6cM0D;L=qBm0++gP8rgQTl%OuF{TduU(QBZaE-MfF zIa42B476J5?55j6C1>@)JLuJA%>TCs+xV-17DqqGg5JE79f!k{O2fp^d;E_cXEla+ z{w9pGRp$BFR)3*SCLf37^WWkdL$;|vHBpWiz7Iv84=3Qi-Akxs)=jOyioe~CR#MZ( zx4B?%rI*TiF{y6uu5m$X>@H5@@?3eP=@L}@$Hw3 z<%8>Ue0-lRJmsY?@?X#NRm+o#y`&^c|7}el`nNURTKH9r>G^c{=uJPP-}TJt#3=V9 z-a=x`oy_F278z1Xe>JOGEuwhua^LNt1?s!O>v_oQX$6cp#xDgP23{2PgShn>xN?~} z7A$chnK6eCH=SVos?r6m=g%f{mT#E0nsR3i9cR~mY6y9eyvg#UXcR9{S@px0z^va; z?lGxp)fY2S5oIT@|M8!r1I^pZ4#(@^b$X?w=k(cO%uR7s`#BfQ7gXku71;Qu?cQOdZf19Lc?_2O+TM! zidR}bBiK$e#IdPW*zb0um!0$fznFXTcqsS3e_Y!^PT5MOkZcKQL$;ZylPxKdt;{Kv zGDIZ1sgNXV*2Gju*0Bv)r?L~WWb9+##x|HS`+VO+oloc7=X^h(-+kYY$M5@B!pyu~ zGuL&!U(dDr87cYdU9v3+h5kk9GRtUg-G7V-35y_i!4IL}c-sly%nHv6!cTm-*RrrU zdf#hR+Ko)X{MuQY^UAjloQO0#_s+%2=e-)qLa!`_%ixuCiW*&Adp8u{y5tcQLmC#L zO$^0b@4Xk!Mcl@+3JmuMPlqQVOcIRg{f~M*&#=tS1n$K3LID>JwV6#SJM_oYU)ii? zu`lZHrv49Nu>(NUEhV2=7z;nt6BqPV*V>@x3N&Q2$CkKrcJ|zZbtiQA_GcA{zS@;u zD<2OACo(RO_25;z>)i2HOew1S!CojB`%!bTsT$RS#5);hTpIoM0sZ8v+Usn`T^eXkYm*l@2c85IMyv)S;G@nP06Mc-Uc63W$%k9wMQ zW-25$%p?;jGvodgH9AQ&=!~@e{0dQ}g&V<*b4(XnaWOIv?=^P^gn3clQ14GYxn1 zN~9`ZXr|J`vAdRzsu1G*-`Fi=yA4!U(%(#O<5&gud^=UT9`9d78vU;S z6=sre5LDUjmdDZ{c==4Z4YYTi)X-py$}!`wS7&jZV0{OBDzlG^{r-ewsn?Od*PQnn z{4Pc7MKAnlg={PB0%Va64}p$U6$C+YYtr~8-c7vr`H(Z<>q|rTVD9u}2QDEi?_Ly; zErq#kyOc0b56{Lu2z|jjj?Nh6mc>E_Fe_H|V)sOPHy`Ba_XF_SaXr1P`%gD|Z|!e( zchvaw3&-jvz<>wVLnj`uiVvE~;Vy1|=miLGvdzCp@mfHNFVz2o*(V#AeegZ>5O&$0IWWN9Z=X?OOd?l&4>t5VN@ zV(sOJ-ag$*cVX43RWhW29Z;en#g!q?a!krgneN`SZtg|kSXQ_Rj~2Pr?y=CIpVZ_? zG}O+WA!A2bt!I{PLNmN>AfDhku{tO1VA;SCQ@r1YSfr!d>>m*4XKRuB0|*%XD-fV! zf;$@o8%XN)?t={jⅇ4A#DUfsEcSe`%l&6fyQ?fDxBY7+0#olWrCI**iOE)PmUQn zkI5vk?a*sfG!kpwA%Ih=j#-zJhZYOn6|>hde{7^YCw-w+1h+M-8*$aC@E4OTG1@ck z^rWXio-B+RV_oVh^AH#WlbsiXO|(XgU3_~2n1F@h(3LfpZv?Tmp{|%@X+GJ*$!4Mb zij&a5!MY2Gf}4$i$UAmKF$cR$!tHctcCE~}+GYX5*Rz+Wf~X|nzz<-6i6E_G6%tL4 zC%N)k<6bREQl1g~96QL%wFkEZqwiVX(PKMV*-qcZEWg4NzM0?0Zn9^-LiygXd$IP- zeMMj)6qw5i-XHqy2TY}|6I2Q7<;D*5^~@-L-FwbF_P7mn z?bT=aeFRTlV=8=iL7R-Udo4T_VNbw<2JP~a4FCYCAo_Gsd8Al5oTvNy8Zcm6jFot< z-sdQ?*}BB3nx#|c*aK1`7zP%lmqRczvlz3EGK3#8A6S`f@3Y;|y_H2w&D*d-Fj$yx zx?#9qHa^6D>XpWkW~@vo04IVU*>1UYK=j}!XZ+3~VYbG7mF^hfIB{8j8CZKKp<(>2 zvtD$#LL9@53@sE}^m?o0TTT?Y%4#PQuI5)iJQCd$b|}-8)ab=cGIPF@4qU&26{CUJ zZEI7w5nW^eVfM9v?fQ)%s@lVJX#T+U%T_Y{tR?5`t&g(}b0prpDkrj0vr}MAO96EK zbArj--|Vn6EGHkQE7v+PvPbWZwCY_FN@ZFx9DCYM8`0f;MJ)p7334Ui)Z&_xdtCpw zAVA?f5O;X29}er$8rU`hCz!qY&NUK-iF!rm!QK;OSw2^%F>Z;f$!?Q=+L8HFSE0w5wyZHX zF3ZJ97z_w-aK~89RN+cuBV5DAw~o##Ct%ea0xQ3k`Cg`r^dN$iwr{;^R4lXx5-fZt&Ho5x zOxManJq41_kG7A(h1aFgfbw6PshxPlqbohh+#mkYK|$HhXU`7BD_=KJr5Y!a&CwJ}h{~@Upd6T$-6E)nt4SKEIZO)jY48{Z^96 zJWpBmb`}>Do3LPpytOP*x=eR?V~&&8VgHi125=mcpf68GaK|5vnK(8&Z<5|WtIk)4 zeQS4TO`1w}pO;HfcJsep;%o0^ST8TOQ}}x0A-4tJJ=%w!9%BmwKI*twiKnupWw$1x zH7atrsTxxsA#Ed| zziEmUT&~o3du=g~N2hPom5^ct2Z!}GZM%t`VDF!M8$qcE=*+A>=q)|ST-*=YF!U>* z@o~WY)EYIfMBzlXA3FIWndV2{{P6hNAYQU7ad&K)!m@Mkb!ja3)x zYVCnd6Wbxdj%;1Jc|A7m>^iZ&G*WPrsX*HB(RN8lJ7Z;2P2|*AuA$y}nfdU2Y&@U^ zd)?TYHzDdotbdRAMt`UJ_sUe)qY{m8m2p8us*!Ioym4A@iIiw}`NPIItR4ZxY>g%ILZ>F@6s96E1*^ zfN=}~+qfNtf9O@z4x_`xzjpYiXkeo^OLHqo6^Bnfcgwi^MFDT_Ij#G>%zQh!1&`*NLQuAC!zRY?vdEeKaJsbl)cMZXEsE zz`M)f)NRRz+RZm)>f+9sMj8jX!eXY>xyqeCy$l3$uOofxqy$d%@UjG$eT!1_qC*sx zk5A_AJK;bI8zC;#*arL1DCy*a-ayTcB4|HjygG1Yat@Wj2=(cM%rY(lLSl!pQN1F^ zVhMW)OWAR18I)WQG*&oJdly!W z)98F&HU2(aKZOvPI7rWOc7SCvlHD0z1dmQgL4|OK7>o&xHX?pueB;9nGBbvI%rdcl zaw*_WvyPBJqFL(*$s|GAEoJOAIet*K7ULIjdcxLoGVX3&n%@~2J-wG|5rtc2DTw!d zagu`9G$!{Gwm**XdwU}6wo66!1v^*)hWMJ-2P*3^`Jihec6;OvoOA9C!f+2pRJT$L zIQOi*i+asI+o7-p4)i5vBdy?IjVTYni2%``u>-jlaQ3iuOaVlk3SeKIiQQx*H9t_6 zFW=x1WKG2e9TV;Et4cqsU(`g_Rvvs0qLq}XM#?hn-j)EdFuFOYg~2C}IEJVe-P4Vp z*hS?7IXZfq5r&mtwrjWgiq~2opWdCu2q85|7IDlbo7NsNCNm>!cutY`UygRK6Bv$< zlS|)m1Z7`y*dwWG03a#>@IZQGAe5NnZ#tR7MiO$^yWfc)Ux6#g4=>7(i*hl)aRRDH zj4VvdoT7Mo$~9pND0WKw_S1(oj^2lp!s@~+aI4+hI6iQ$tL;y))%N498S4^&-uOMa zd0oD%uS7>J&IcnV)Jhb?NUUAD%Z1EEjq0qWPfM?kj#AF$A*YgwdqcL`gdHZT*+}$c zVUJhlQOiDmZWO!cGq_-t%z*^nv2{oL0k)(4wpv$u7o=iPB&88E9e&v7_}pz4BEa~$ z#pf~H>S=B%Bi-!rV8n4&!)`(#hPL^_6vD2+=V8VVZ~LZ)(QFMscdA=q!0D_QUFEru zY9_$iMT+#?^_V~daDg=22MkJiPXo=nE?-#g?`)V5i<}?z%0nbhJ_t=>q;C3pS}dap z?ew8YAV0jCLV6mZ_&m-N)?QV=PCd(W`(?ht&oE9L4p6xbZl6X6zVTBQN@N~Ka1<`s0jBjL(Kcw& zQ?BvL-f-5-@k~Tkp4WWFX4);py9cGMF=P-^Ml1Nh+D3CdC-1-mpYO4yD&k=#a+@B~ z?g7)9z~#^FdR`^(B17W2LdMWP=at=XTQ|c`zwTbMp;vsvzVjA>B#_Gdn5S5w0If)M z0EgoB-GxC!!qQOid2#eIN{H`x2u)&5B(N*5C)g1_uXHpL1@o*3LS|K<6$NvAp0MmF z$;v{&X4Awxo8cdQX31}V#rdFmeg7X0^zC{@qpx-U2_)FPv8W4z?i2CPK$iQ=H%S0l zi%Okk8e1_>j}Rd>6_b8t-DXW_|86lk1{=q8TJ!uZIuQAH=s>m9{{S5bUqc7FML(C3 z2C1$q3lukO@sS|_3%K;tApC4tfEVvNM_TTq-DSO#`l~)*GEl=;`LFr_=68L7C^*X! z_4Kc+m2C1p@c#oWaC`ODI|oX=*%bEo!XydP1u!_os%>F8o}s_qrn%h85W|y6vF=Y0 zVUjPphy`?vM{VRg8(w$d5e5=gAC{v}!lWt?)4oubULwH9TNhaogn)iT7xVqU(#YJ` z?59-QPBwAKj}Z<^HKqf%RkMFcoRGA_)C&ha21Jw=NJv!moiXCnSw!b+)f(YEpFiq} ztsgNfPE*E_%s#9+QajVhFnU{=#|bnr#^SCcLXw#w*e7#t+J z;Ao|>lKT4a>1dt`wlmhTSp_Gi8C7dEYQB~x1-77J6LraK)3UH4B&-@$I5*(Ml^*ng z&{e%FXXkc53Hb)C$7i?90D?D2iUh-5$o{kgUspNui*xWLdL3 zo|D}qJ)#Z*WD8T<%d5#A!@X`WT$Tvg6+&Geq(^{^Ck|!XW`hwkAc^3$Y{Jt;P9+E9 ziRHkqOeEST;MNS(++Z!Ws9j}3U@6{&5$U8I4!>!w3ecVh@2%~4M18yl47iLO;v_-x z(CwA{jsKiScI1DMM>dOVXG=Pc&jRM#NK&b=XJS!ee$2=@@gMe2Gy?zf?PCKb>#gJ$-Za*lFR>Q-t3kYiKtlW zMCt5yE5fOP?cnfJR#+%5A8OZ0jm^KGEsQW?OZaVBF*!)K55G7{olX4;_U_8wpCUan z((ucbUHktemyf$~{h{Tf#HXB%?)yFWeqvh+G+9=g5vs28{{jXq8*ERA$Nvp5AcFai zfq`sF;v7Iv?v#_g&(ig00gJ4wcP?1SC({HgA}-DSC%Js@S~LNWNC3IKh7=AA7)iP83K z9%IW2eBei0IOIt}DPam2z}1lYCCCU>zVkaWUQ9;FWeHt_Mj)7XZ+urQgN zGj}B1*A?>(AU@jYe-RvLD`h81JV7K~L+(PhK7-}$1Xxl&MSH)|tJ{G)-vgryZaAH3 z+71J9dD;{1y{TUZ*&xMoX$i~izX;~PMuY$QN3=U;0tu_U~|dwBWR5IBwW9ksOIR% z#spR(5i9>QrTizFOy6CkZ>dj42JyZXBgzDIrF+HkY04b=fe3g3@;(sxapsley%(&z zp}@f`^Ymp_Iq8TIN@FFhu>#xAo~R!t3`AJ|rbi!mv$UWvt}PAuQb6y6qMhMcN;rwCPM3J^W%g}g-8r;M4s^BrLsU7}N;jxp5O{on5SNQI= zkin7{S4#%O5)^N|y~VHi2#H%jE#QzA^>K62^@PS?*Qjj?VXPDdvAH#vqPSvB->{Z} z0v`H9##<7wW}kJ}`7X&|Z}LGs29UEo!{j|Si-2x{K_!I%rVs`Ilclfc@I$4+;fH!S z5q|VfxfOiCtBU|a7s27z^geUw&dyChAF#LzMt?LV?^-i`j9{u;gRr5HikDM(RjcpA zrY{uun=`F^(_4;_&SWg~V70SZsVx)J^R&|z+h@?;nu}OUGc;T9AgkziMYejrxHpz{ z#pu8;(FDp(qi9sNBhR|?yfHW|hEm)O@O0Fq(LhAxrdtwsJBJJR-CEwTkGICX8>Rib zW}LIQsoLxbDVpUXHS9o!yl+ZH5%g*332Y4%PK%)w8m}OfD@o1o$`@VKj|FzZ;==M( z5F%8$EXNF!&jpUtUOLD)*_uEVL@Q(fFs#-Fe8x>-9Fr-hr^lR|js%Zroe(>b!)J_2 zHcZrRR6S`xLJtvfU>wKX^wl{+!!niT$7UOCt*UvC84aZ6lohs8v$7rSz*Dms`|EVi z^?B545V0PVM&HSt#p%Xtzjxw%XL|cg-f=O3voIb1s0%Z=d_@`2BA32u=db8yN5G1^ zezmfhR?f@e3YhL4CD7b;ugW6(tXz1qwBHt0ey$8NjdTAa=}px)5oEHh9jJnNU6CEe zY({Ef0nE>1j5KU&Wwx^eVu9{?lxFs}N?`v{>0593*2ZVij(wG)&OVhwvoQQSl4?@; zUkB+-($%8%L7IasSN=^T-N{_Iq2#xN?wr~CoP(XXIn5aRbE$GYD%yhx_p2wXJG5dZ zF#GHa6#g)svucqpB|`jfylrsbQ{K8sAmt4RGv&*P*~38E0c_{`gkg|O$qx5p8zkBv z<(H2(Z=ZR2VR3u!ZyHkK;-~^Y`#oj{MW1GzAN@4gi`ZcTPaF_=pE^6aN`pIMWK#T{ zPN@t|>&B66U2p^F`r6RZvl027rataA`}mC?+rr+vT+sbCQnl_pFHZuWb0^5fEMm|W zgZJz+XCs#^SC-otb>Rd1+!!*^(*1OeeX=a3e?M3LTqQH64E&GZn44fWh@Gi*ocpd( zJT+hrfr8vH`Tezb4U=Hk?@oEib90nG=^KbpqiVc1J9;2x%tEW{MYf>ZE@Og)&hbsk zhnMBNkLPwFn7=AqTi%yVS2duRG!!hgtI6Z6%cq6+Nl`vguYV+2+{g^enzXI)eH(^# z^gi@lQ)8y@MuBy~Zr(L-VD_DVY+i!lCB=Cc`bF0X$VJeT^p5~s5c?c;bf&~BKPaBtj=4s zhZY_K*jDw6KRM8y%-^|X1^8ve9Ar<+jP+^xZ!GBF74sQCE9Ot~`rJN@-Fm(HKHB&G%C%Nta!6PY>D|<_^D(5>8$lYI4Pt#voC7zoSn9vL&0j;^NUsn z!C$e3qJVVg8eJ#daX(1+g5B(?`6P1m_);+1jAO_WD3P@zFKiS?Byt74ce*N*|JC>W z2{rC_YHyJ8Z59k^4cZbm)$O@b3hx3(-t6-ci552TKCN8xop^7lG18D^)9&rsgR!p7 zGcdaG^3_EvwQk#M7cyW%)-E)A2M`KVg9Wns-IJA91=of3sY+j6omrm#MaBp_8Agrz zO&@}y3l=ZZ7`+Wmgzq#KkeIf7<+u_SI| z(xvua;-+S+J{PpgFKGPgAG7)PTA&+I>)R-fKZbA`Y4VxZX3l3&drGb#GnF;|T7P$C zm`~l(%d5j*8+-BSHf9U#M+$gJMZz%~L*@UA7u`mZxvZ3vjf{t7k<@M#BK84Ie-^?J zA_Uz0?ELRj6SFVU&wmgu2vd!Z3u@6lf7GJI;oEMIxow;yRr;s&??LU^rp24KtdiYZ zQ7W7rE4@#XcW<)7jKM9u94Jtt$unU6UQRz+NII# zgAfIwmNswLMs$zoDIavlg&%gk|AAh%2%e|99n>4G{F>%;8`{L0z7`?jBT_W_29 zLuEc52CSf)qAni7Zf+Mc?v3D=uDxTwWx)2!4tN6^d}a&1z#HDW=r{!@=^fr2b5a-o zf^z(d$-gxCtQ$tMh_Rnl_*NsT%~G{?JrJP8|BwV&?(xMJnt@e zgkgic)RI1~hPhllPke(jHLx0WTI>#KJIRXi>n}iUx#e$cuqUuk{nHdgc4^ig#FR=G;j@0%WC0k1;d-<~K%YNYWmWJE6<-1w0V zT0)*L9*rdA@%P8ZjgobFheGMxYuZPrEq~=ZerdoQV2AV|0YA5nVg8VnepK-9q>?u> zR>l}+KE+0LgAgMrVqT1+Sm01JPXcpWL?Mt2vG=3Ms%-zLr!OS$eirnOz7T=6ByYa> zA{v`S#zzM-FO{smT+8%vDo{7hJV@|0IkfAT*1-~t#I0Cx=f+7LZ{lZfb;VjizXd+< zqRYQcBf`;zeSZ7jB#)M=GH-cFIhQY87r9wAF$%rYq~o==B8P=FlBoYLRad}q3VzHn z$p7bjCy<^u;+QpLQjh$M5;F!-M5sw{Tr^H}sGXcYkfKl8paC|Dg%x~A4vtn%apIYi z?}yH1ArH+wB5!7=^@^t`LzT1Qks6g-)5(tVIOc<-74g8Q^Kv4GV)&X6O^V+vYLj${ znl#JzL`-Lo#()pvV53$EXos0s!%)&(H;&_kW>Zr^ty+>Tu8pvajg282&k)$z5xAf|NE+Is^=y)9TC+hiG%qObT%+D;VfQjU*cBk~4dd#qSltPyemlEyFgj`j^ignUtdg?l9OH2y zWi4A>wt|VeR)l4EjT%riVzT_m7OUy_!*c}?I3c<>YAtB@%?CvJdeE-I0&6|*C#eR> z*eh$owR7I7P;~6#>=(V>DLkk=(PvnZ#;dFD-c2yy{<4S?GD36=N$o4_E=J-!a}(yi zX)S~)x~5`&-bYq$1N%rJnD#f{xu`3&hX+C@;L?EwXvJZo zLo}S`HHi(zVx%~(pZmf|)S`VM*_}gp|7UT*rZh+&Yke225nNmOn1; zz%K``eExLNsQNNOigvc1#*5+!ysf6&PLK7wS(Iw}K8w>BcWrKKdI{TPiCZ&SI(8M@ zp@Wh^$^9w3duWqK-_&9m7VZSosvp|Tj&S-cRu5c%;<2>Eh354bXw|&*`;tkvT zZ?c1j1U=-$w@eDw^pdanY>D9mR}pt`WnynG2Mx|}v#(Jfw)*cV*ZlDnINknQLfC&+ z*P*f(F$5n&y`98UvgKh$lbPdA2Zn>URCj0hKp|(GPvdO#0Ex41w1b z;g@@I@5RH06CR~9i(AS0cz;g=RzyE`kZeuEobnWnA_h{#-%Xa_DORnhXN+pUfh%G* zyo!KukpYB@L?vO3a8X<%T+kc{5nyt#wSkEq`b#lcK0erPx2qOm@Va=}#k(qTZ7 zzHn#v(ICquvdiVNe`Ff1bj1FMH2S*ogMGR5FZN~TXZEG#wAf3)zPJ~O+_=~O!TRFY zDBkgL%Tz>W<(CRc;|fB69qkuZaW(dv`Qw9EkPGA;zrAnEk^Y`(bnyZj^M=dE@F-2h zchiy3)j!7?t$Q*jU1r${!XSD4KZ-R{GfGj=6&u`ey=M#wm!yTrbS8u56*x|C3? zex0UafqCVc2PO4aW-Le5=lnZTzRqwQhidwZv` z_}vNky-N769{+25$uxvZ&hX27e0!Ev>fCx2NrtRqx*`$&Bhm<_z}`l>=CTc&OmtW7 z&tweYTCKkz>jxck{jYS40*c?bX+wO0(L1$ayDyN58`q`PBvfl|s1|<(Wyjz$U=WK2 zYOx~k``|QJS(OpQs95HeY?vi3G5mHv^b_le;!S8!b1M40`JXnAH`tp;r@7;Fv!;KO zWh7dQCwnji&|`5&CUH*T#bh_t{U9w!o0XBMG?;(5=S^<9pm(vN<1}UhQ|^;~7t{{< z1hZU>H8W=q*(nE5Yy%nJ1C3Oe4gMbmIG4?P5$P2|mY`TCa^Sj)0o{utfl?f^nPKj_ z_P*B(f3+B;h?xT0FCy5ADFl9ye0oOz+~xQ~yspO1+~>H-aQ$1d5oT=frpyBrFC)jY zS2)nGj@=tmyl?C$?`EU+p?#9+bB`h3c!mrMo>wm0+Gp!BD+)ue(lPJ^G=`^SsDazX z#N}%sv9yRraUj(K52y5h;b{Jo-D2u#A&x&d8Wc!XT+|v9bNNK$`|oWQ+h9S(jQ|Q5 zTJZZD#St1(c?zoO!7|0ob)*-!n&|mtjrY}wgR+iyh|`h|b;Wr0b$rrEPF!k4GALnj zGfe8z7+MIT)p=taEyMI2e*-n0ub2o0LLiR*+0f2I`sKJNU_Ii~pI>;|1Dr=xw}vmy zYdWF2Wo7L~>b1@*^=I-<#O`rJ>W|;TJvvz_WNNT?{TEzd;^-2I9Zf2jvoQW+O*#Qj z;_@2@%3x2zx4!*#4SH;w;};#ZY99$5TiB(S^xHGUKJg zVy(&~s#ivT*{YF;Wx`xr?Ps_n5Of>4gBL3rhC$ zy3~Cl4&D6>gjgC|G(fJlE@9pkLB3H8ZQuwJm#kq!4LCKa6|wD!zx zI=UFeZx1jq5)%|T;JZN%M?)E@?R0kkX|U8`k+UQ%A>Q%(nj-e4)^q!J##CLI53m1( zI%6(yj5Xm08BFg9juj|%%q+jrH)3K-mlhvyMqNkNE@5g9IlhGLhIKg<5dyUJ!yhCQ zVsnZM74yh%a}nk24~&c=!)Dp(qfy|+*G^LwoE8ECVEjJC!H!O*F$?3wy%de{kt=mBJ45uDJ06|JjHb!=K$=m-=Tzx=gS|qYg7urk3AQx~57Qti>xLRMBSikao z=^cJ7=7T|odwR#;v;}?9;OZhda@d^f+O{Wk3)n3OjD1h4G_$sfdySJ5@?>(%Sw8VH2J3~PdN zr2|~Xq8mOKarHn7=v(A7THvxC^UJJj4I0JV-L}YT+(|$#&k$Qnj3GS=er}sa?^uo)w~d6WgeK+ofKi#xrLB9((R=w>s4M)epv*4l zWCq=ZNQb(vzpj7|S9);3rCf3OF{Qmz&-^4q<2RlRO@LUt_na+t@(vE~gzX3>BgUVR z!TPE2oEB(M<^+&>k58o59@O@0YRj~5)nB_7OoqyAfJ*m$ZFjuChkgD1D+Nu_ou9J zkU(2_h@Lm@4L9x6y^C1!O4bwsuO~mAiA=Itkn*{L!;5r-6uC-EP z6=fmVPG(ZfinuG$a*UaN5ug*t52lopZ^t8oeK;0XK&8=6URjz`o1VPLU2#ftgSVz` zR}uBKA=a+&Ig0m5GkJM~+-P0ZS*nG@(yS2*&3GebY7L?~#Jsbkyrmd-68ziWNJP~t zwjk!~h>oMw&=%PHW`jjK%nIYmgnK!bigZ|GVk+HM zg5r(6-IrHqI~>5qm*4*qp*YF7U45s$QA%&hF#Vb_M0(`wTACsSI*E)|%M_ zY*rUJqDJu~rNLkJGhg}y#i$8g^KE2CHfz5`R>m9zO?zQ@TrE)~5yQueU;5PFw|dJr z1gM5?Tf@0tCl-H@xXCu9oWF6)qfTo) z)Nuu^SD2x}9dap2W*p71L%54Xq7%6^X#!OsQR@I_&50H^MOy#i*aY*n?s5kfe7|uS z)d9JxejvsFNb#7LQk@WMH3xZ=-hQ zgy}DsHV6kfp}YpjmZS6oU&n$%_;NW}g_mzHyKW7rxvc1nG{ zO6y^wWHxYRyglDfbv@_p6O?k(JJG+&T{a^9be^fM!)&79wL>t05uZ@#Z{82U4hrtF5F*o$cItQ8`IoMVO}&iDFCu`Pi=dV(~&QMoyV{x z6?ZUgh&%NA71EM+%x&Y_@ak#^7hemUwi4tJ9(#;+=1O3Zx(Ig%bD+1AVP(bYu7sR@ zMq4S}wXSB)s0oB3K%FyqU)XKVB4-w=%lddx63@_(yi?vAI9_pT&-~)_^--N?9lO(+ zu!X}(lq-GjM=efUZT64r{=v1!uIS1f<*bxu9MXpwLA9w?LX~RUYz3H3j32Ge9|yx2 zqlP!O(M0hZoOa!H)0vrCzGYoguO;64wgEYpvX*hy4g}6^psvT8fV)z+L|mFHwxbbj z`3DkL%9HuqhHFs2h<`h8DLB_hu3cK#$|DE)jE0u44lO(m7asdn+|~B+)A-3UuFz1| zC1CF{+L-UbtmQn>+Y zuz5Z6%r)t@WjJJM?1fEF{C2*0t6T}mmkvwIvpa>LuDv34W}F5m!)rnvmJ~qE)s>`3 z#}5fRPz&2jze-#4Fm`%}hoP;5ZKBs0+xI=)*AWW7%kBleA!zvB_~#9!+f=Y|0PQ3XVS<2d9U+v?-#{ZIn6``mO-MW_rL~xF1D~6_6%n` z7bc9Z=#bys33n(!n<U@ZEpk)D_s2eI{G5VhMJz$r?Ul zK8`M;BWI#V@Y@TsVr>x@bv5rGq89Co8$KGC`M1GQ!q2o$>#yAbJM02zFa%f&RS z#T%xAk}POl2SA_E>&8vW{(t+ zOh1G4wlFLP3%M%CPd#QYh-iwlhQ<77>w>mcwvgY4Yys8Mw7sDCPx{$j*MX^NVr2dE zGeh4!)fwS8I@dwDh$EAzNXtX54t9iv$W!Y7!sRU5ca)P_52Glhp7s9d=Iu4VuqMs% z?n;`<7On;mhXHWDd~2-gK6`~bec%IMH!3w$2DNr<-rh)9Y0Jk{(Bs&SYFE}pvV9+x zF4H;JM@`!y#;xi2&}|5Go;$p|FS9>fIcF_nypXc7qYR0&B#m(6I0xd?`gd_e7IeZZcO6OTWVTXACoZM z1yM?4bzdr*sd~@r;ih$~NE?_9^SQ5m*$YmmT|juzQmucVBR9~E=41=Z)lO{fnb?_9 zm(SLZ$^f6+BZk}9zXxPj1oA^}9fe%p@Znip6-AeK@C9_8gUsPr16F>RP{P5str?p@ z*~>QeIn|3{2s4h-Y;*O>H!NNCh?)0q-rJsF5iU}QP=RH{932&k0TahGLFS4oGp!CM zvc!tMqRFxiRm@wlqwyOb9|6T#81mu=tgoM>FVRhSnMOr%q`meXFWwLJVVcz;C!SH@ zkfY2TQC7$3!ue&)WAJoe2&szxo!dlfV+QwB+8rHnKk6|!ZgB|pYpoFy(ULjZI?_nt zLGD1f1+$F8FzR1e`Z-CgHUjFYh=0_l=!JU62;D06$8dY{3WX;Y$6xBn6x)Yb1)Eq= z#I9DB-C-EZZ&V`X?tzpW;yF~CRXM>sajQE%nt7j1Vn4#H2KQ~5wL%sI;Xe>1rhr*9 z>oEA6l1B|_?5XOIM7Ps8jFni$fetLcpjJmWNJ3G-_~@gn zn4p~VW74v3?F zacaSDRO@f7yiJ$7m!1a`d5Y9vWsWXTDToJjZfq?Jygx1z3*2v#$8GO1s8Ba=usAn1 z5j|9)rMvcN_?^%H64|DJQZyKVm}1Wh3tCLVq7Dzc66m4Z_tv8Ctss1Wtr{-bMHkt{ zHeLQ5m>XSL`rSTy*q63~n=>)WP8)?(_Wnbr@^HMd{`>zRRr%?)@2Seo>_Q^y0uFdL zul(#FeVZHPmaQ+A{oB96b%{1Oxb`i|`>%W^mrJ}>oA`{iqSg)rO=J7l)kfqDa^zZCh4s4Z1I5{a+%X|bG3e^2M=R3J% z$xY@^sc)MI+>1$I57!GB@HSM#YiY3VpF71E%DZP#xm*5N{T z&0HEeeJ~q_r-rq+BN7mGv+8QT2J}>{0nOggK+M>zPP-DKWJgkP{Uso37jnJZsAbr} z|B^<^>{#{-UA2DgrT7=qFYB>?%wmGex9F6I(FB|PSl`PD8nh0$17b=i8arz+#%Uxc z1=Bir0FP9?9bw-A3^{^2IQe;&9=n`EW-h)YE_YYDTHSLGEY(*(W| zLfqaa5Wbm9W5THLa2$Pd*v)Nz}dp480j=CZvP(UBj z%9T~2_DZUmCi$N|)cRti*B9(+w>Pw(Ka=DWbWMDxeZ(2fkY#r|MD$qB^Ae-ZJHAxQ z>*l&1OjppH33RV~6EWVXiS7~BAmHl6#&x#qgA+45IwwlsJ zXhlBuTbhf$%tJ2@inRcv>!yb;?!oHDva(UzwG`9i$ImXw4u?8Qemyzq^A;{-{Dk4@ zJ&o z$^h>TEDU;81>H*UzqGBj(y~JIin!@ZM6!?6^#OU6Ou9F_y!oR#K^Y;jbK#e6y+-}c z$f)}xohX};>Are}QM}~tT7#f7B5FVcYqRj`sWBxoE{dT<6L`~dYUvq?ac;qX6i2D| z21!=6)1$v!RcSVY^+oM*$f7uI_pHM-!9fb(q+>+u@$$p9I)E*wOfuZ@uUtAY+3a{} z$I-HzbWebZ$=ltV{;r>$p(SguOPhxF0a-dXA@`*~^*Q~0vJq4H8g~u{!hhF)uvB`y zEvj}`*Xg@wiP^k~QD0q`;NPdUmp@HwG2wBOr?1E3`v(hgQ6}JTboQmOM z;|+i4ZH3u-+xEQk)-(xlRbFyns$EGC6Z`7iEy};wZBqAF66kF}3oh_6Z}8s!5nRRM zc*(Y%?mo|X>2>%G_})7m))g)mg6Y1c3(DN7YH-?rIJRvK{({tsE0pp?F@~2GS##<= z8W)2dJvaO3;MT@9;U*&kiyCGOd1pCexCUu;82_3I>$1JON^S>*%E9Y}%I<50%6BjY z0^bXjD|WYjuTvgm4pO0gyy~Et8Cy`~hWu)ayjs_b=Wqi-KZooOM7u9zgK{tO zV2NjW=O#nmzo?d`*am%^Q5go#&=x^Mo_3uiZ)bV+jtvJ~cpmtlDKPX+s`xyTcD`(s zJo7s@XMT5{VI`;lZaoacSHlgQnK7PuSAF^H?)NRKt%!lfOB}l@s35O~X-S_T2zkfU z#P33&mp=SLdbx8IlP^;qsk2pK#AC0h2`&qW)SbgWG$c_bw?#RFU} zmsqanIxw|y!vCKNmc=qceJ;0WNIm|OAlLdcz$G-w2g$!@$UwJVF&L{=GUPK|R&)QZ zq_FI2D!?2}Nh*ZuyBArFgGw1t$yPK2saS7(2I1Ta6!+u4RZL)VRgUgj&1LPQd5$Ju z2D|#T1nl12JIQ#-9F>H4uIJ5VKYLr7eb>FMvKQwWeai2!%-bHN@3cC zIYIaWw74V8D!9hT1vs}`^STIjp7!}OG2UIJpo=^+IxsKfgMe|_J@H(afruh7|ARUE zOoQ@t@=b&n<>dZ>kupbV||9Nq!vIvRg7L!(nKLGefh zEh6l0{+Y}6@MznF?l}pm*iL1GPKDXp+pjR^du89GDjPUE|0@*BM97%kI4&NMcFe}N zjFi!mW3Mga+zPJv{9?!uSKIwp919Usf=7zqK#TTRA=jRk9@9^udO@8VaD%rQf&H@*U~>+ ztL1=I$n!X&ioKiMP8n6PW;Bn{xYL6xsV||N6VFgjQJ)q;&z*fzRW%(3VGjp06oe<8 z+bk4}6>foagc3rEDH&s3;mei2=bkUM3+C{rA z4AE|cSs44lX%yHe*fZ!FQxupdb(6V96%B=K@W~G6v)=O@JmeJ^_tuCZS?0VD3n_|l zi(|=A-~_kdfQ9ha9B&=p;8WWdxN`D_J0=yc`07l7#W~RjM)%Ve$z|?Lv!z`lyyC8c z;!-^KzKNB=_S9b+G~s|zR5l_Zh~W%!nI{uG=Vy`a`Ct&MHeJSSLWna{7yxq=kLI`k zu(Yu0E?>Pi*91@3NXI0?l`fU@5;0-|4gmqG$(%pN$|Um;Wb(j0@rVcRn&7~1x|`Jx zyPJ_3mN6y*7(0d_QW&v*oWP%21A7Yfk2<+@Kz!$>DXQUp*8{uS#lX!(WW(??3QmgY zFNcMZ$H--N>_c~*`JQw=d!wd5#fIm^IYUAT~z?+Y@>v+-a|Mb`CkrMq+W2F60pbPVs= zzF=`Mn{&8Vc?Bz;Vp)sjWKUZhv4qiU3XCje4^kSom-#j95UMBk>*ut5b>!?5KUtiL|ih*+qKC430onErN!|SSf>d(#1Ui6Y3`Tj%$~I;!TozBI z4fZJd$d5xoko6A|XzSy;16*YmPvu<_`w~cd8L_oSSe<}ps+Da;rM^2^qJU=NU>*j> zKYeq~0d>MW34A`h4*~M#A=|gg?qX0`LjxC+{=w|ArkDmc_|K}BPg`7V~aPxQF84wtYr_e}oyNntuW)m*k@gM}4!xOfhv5glxk(iPD(BU|p>kHQU!ShZ zbs%UZ4+N1zRhMnGvdrD*yl6(t2SsL5mxgTO%Oa|4+(4%x9VY-Y6ZEm9#=pvEC=le_ zX;s0{;wxQRL}Zs7+RJ-d)!s&DVn301WyN!05^(}kh_eXNwtI$J+#A#38)prz#Jpn` z*{qDG&5^x+3SqL8dPMxXAuv?E2ReePfe9>R(aYW1RK} zi}a0IeTL)u5Y&SyCyGo(X|H%Q?B`;}7$S^)7B4(NkUqI|lhR(RSyYXqsAEZ3m68n8 zZv5W1E~Wod-I>Qjxj%egr#h%}QY58>;z&}dq{7TEsm2l|k#$0{#gJq-DiVsxUP8)p zWSOW5VWuLxGh`Y2$j;bin1$J%?+iNU{C=JLe(w9P=lR=f#$~PK~t}&Sze|+9f{Yz_)`A# zRX#5^rQDyT{`pAIxTo$z9{om-cN#==N(cROEtwE_**GreLh5+yQtna$H#0~XI}_~H z>0&;0=*uj?T0ZXXciQ$-nQHWL3xE5|GYfnjlLsPpD;v2|tqW+Ctm@~;bdC-8B>2ss z78h_JxQxG!i#o~F7CZh#HUmVfmu-~l>ql0i)$g3cUuaJshAkgs;9(kI+8z$-NE?|K zwPvPrh9ncl?BmB=F#fLcet{Q%2gi0f`2=msfA0fyhJA$wGg50(hxsbRq3lX(bXtdU zyvqR#ue6nGU`J?ALO}TroDDMB>qxRL$)9I4Rgf&{jeVmC6$-@ehPlo6OVy!XhB1^97JFyJS-4gKlu+)GQn zz1{D9Hl=CFKFq_s*tno@ikOF67EsIf4w`H>(?BtVfUzuW7<;UXutOT)V`pmZk$bOv z?$-8rRj^aD!dmp{6QOc4#lx98uPq+)ND_*Mzf8~&9c5iH(ipi^YO;JZ1YK< zxg|Qd*(I<0UPoG)+0zj18z?J%nJyiQe;?R{)at|LBX!A7n&*G^5d^l$*BiRtKmFu1 zWWFjDq>|vRH}Ilnh51)fU2Th_<1CGO4qPgJv-6b)=af@+NJ964y1K&etvv%XnD4a9 zhzstd$1b#GwAj+o4mUOI{?+_-N6KCIiWbhKr0P_3X5c3^Yu~v*NxMDK>`SupQ?@XS z>jbW&|JC*6ZClle8@su-c%u%K^ik$Ve?ORmn$Hb zR|+k`pgGV4;Ql@@8U0ZP^s6ZNras1M`0=ta+K1&j!WyobgZ51;4#7qHH^TOec?G5Qlm(DQ#^N^VOq6GtA3!~d$<^u@lEQJ z>hXXjR%pYlkd-D^Bnk(%Y(%1Qkz+{JL^Sskd5e+S7NergrtCsyeE6|O{|y65kiCYy zN8}E!Xdmy*M&QaGM8A*{d>1M6Z;Am03*Fo(k@qDV&z)rGS*miYva5;hLqIh3*>g-i zK;laU3l3=V^Kf9;h1(LcTc%0|tm+i-Tmv1}YpP*@Gs}u^jHQes*Mq}11HMz$6A9s* zq>lweZBvC?vg|^lLjRV{1xZ}1LBo<;>`+7Cs8s_O$-kP_N~Fbg%sMVUwFE;zymZ$S zl=<2dV_s{d-0b1UI58z{o#;(fz*1@_d$UvOhRG8;7hqEOCTvlR82i z&55_LW7B7ubcX^&0)OQck_tBRE_GIwnPf(H{P)n1+5g|5A@0ZfIUvHk6QGqryOH(j zC3}Rv%?K^}%6kMT*R6**nsZPA@R4>NmDj3gVi*q#F$KvpA$bo!bJmgg0$%FvYASu>b)1Ebn14P1KmSg{n7n_7v*^Da*CBIx1} zS(SUt@EWx#3vt2bxu`1Bg|xF<6PrkB|6}odwiHtgSi_`?MYCq}hgxirP0&ve2W|$i zdkzF;isa;uz3%aAo3fbvWrHG~yX^LS98?Z5KJoDvn?2zdAz&xyq5=)20aio1z-s7M zYmrxR#)%=_zCsVqz%VW7$nYS8j#Y@rZi>GL>N41d1fj1EPJU2~2o+=EO!FrtuZybE zdZWG|)-%q4c~NK+7!FKv;Z?l_%l|q@RfCz#dNmre+5a-q1{+G1&_asF=HPE~+vcV$ zmik*1Zs>C8%bt(G@H$xxOUT_o_nAVX#RDg;uc;*6d$A#jF>qK&wR+cy8+Yb%$rMUb z2yXChyzsi{aQC&$Exjyu;NG0~GY>XX-K1IM<*FUbzY5zIW+yW7ZQ3lqF`>|=Y z(Y(4b!ja;=^XP9~vrFgeU6?gyNZ;w{G_Yfe@zacB&HjOF!HO2TCG;2vsq+D!)-`;{ zh_+~79BVfpGV)OF^Wuxco2ab0LLOwqUH{@8%A-0Hd6jq{K4j$cGGxS+4;k@AK`+64 z!3)IvMb>`l+Dz-)Lmp326xPsm+5-lahR2Fvppt;68z#lYaKz5aQbSm41TGG0@&KGC z@+_`-0}1k8VnP zrS-b4!vgNA_ZDRoXg)-OfRrG(u&NU9iFcvP#&dL zx>sJX4!>YlUQE3nAp^_p8MlHm6-2ZSXVT#`2tqq%4%(}ln{*a*QZm6=$<)>UfnI%#NdD@#LR^Px> zn8oi|Y~Mc*-T{+fvqhfk&yU>k-kM-AwCDNt#I-u|w7#0& zix*CU-MBRZy?{W%^6j^J$h)&M^It$nlmUc87+B$lw(|4R&A(0L@K33ZYRjUy?ik@oya)21rP>oL`%lY7aU*~@* zi%AW;&bC>inVFGT`D1Edf(AcfgN82JjCP+(bL*Z^H%G0YEFGQV|!{ljvjV)5^=KgEy1^<#%|U>Joy=-RhCP@Et*8A=q0g| zJ8Z@IFp@;78sT3-j50%kP)_#o9)-sZB)q86=dxj_z*XNIlym8;OL}quC29*$qS<1I z{|$#^u*9Px&%1B*wA-#@pU&UPzi1+6G?;AU8;P_S9CJPZeL#vYGfwZNAkV_`U1^*r zD&?43;lK-|MbKhBw9;a2TqTP}79LZmFq}V0>vA9z(j*0X`Djt?Wwhwa{Qxa`5THe$ z7{u_=q9MyMJB(MeKd;4V*R|uz`RBZpUAHTRzM8Uwi)8scjF9T7%nmI_4`Qa~Is}VH z{K*!V_GMNo_Wl}q2cd1Wx+v-ie;-$-}geFo+g=|->FCA+M{YM^55rh&ZNZA!3{Z9`c412 zEPCRH%c56)v@D8{rD34W?>&>MpGf~|Ab4q%w2c_1E}$2%UGOPHNQ#?vJQGZ?VyOET z0~Ct~EtoY2Ovw(9`aDhLJJpy+F|C2`tBTDOBA*O$)TZ!ujopdDyj7I=?FI3C5=fM> zu#(M0Mup|&<&9Hbci(9j9+Hc`=HO7IzO1rtUtB4*+E^V+@1F{jiMSJHtlg8*GNjGD zPIE~gV3^W(0iZ5|hsJBGZ?QnD44wRay;r&!2+or@Zf+zAQfd>-)*0U}5c zpi@HH!fJ?L=5fy&2;2dNP);)F@I1`rbh7KUB}X;a&xxl!QQ$q|3fUV}ZZtU|>Owgy zzRO0wPW#GfC*Mca3mvyyq_0tfW&BspONk!`za0(^$jgR;t^5tF;tAccoGTcP9Ybcz z1+_0JfE?bk^jt{@<^g3lSQ{B)tbdJwc$q6!isE$6ip>gvN)7iQ@J~v?UtB}H!qaJ* zcAN=>zb?u99ZSCe07asrQ6FHi{58+#KLl?oE;=NM)UkA)G8G&IqHU5<7Q{)DLVm`O zc{#i1i~d^Uj*DiNI1XM4$ggiwydazpVzUwd-LQR~tT?ChCN|&uoAd{81NEan3aP>R zr*@4h%x7jjvWAB|Y*`)15m;T!;d2)c^0LGodW3CIY_s?Ki!*Ev_;;aL4zlUHmP3cwuzq7;Jb`-UTs z1{{pP>!?77s2OO7^uSSH9{vze7fKR%)P=>rP!|@a{znK#-L@&Pw&+M_Sl64Xk4dN8 znKHaNipV=thjnPeX-BfJZ~>mFs)c+X`JA0>eQ5eEdv74jYTBi*@*06X@FcZ)$DSJPjRCG$Q zblZ_7tm9drBz#WPKo=srWyh@CP*T^Y^y-ENJ5g4I;JKkSv9jF(czeFzG?j& zFmuQ&IytfXmWxhJw{q*z{I*Yk=q#{*ZeG_pcA4U<5hN|r^)cq*sJ82_4Zf6Op0v8A zzxfUrEH{wf63?Ikexvqu{KgKAy!!Q~20XZ)30i;Q>$0?6=!grJkYy)x2n+* z%W8=t^10BsuHqJC(a9&HtzWS>ko)I14O&TyBQ4^lB%FC~5z_OUPaw{o7E6hc1d5I} z@`8)a7u4jL9*HH#B;=zZ$dmp-?rWrEH^GsT_Lj?HTS)KCck%0sTv@E_0CMc7U?z{> zaBZs|%<}A3yP+_ucrPFy3U~ux12R=z=&nz(U4@cOd#Mum?;m^w#BAqf)k5^LG!aak;HQ~|-dA9Nfc;JJh!bpa}qrPYCA!RwW1A<=jDD&T`Ztz|&PZS;bZcJ)~KqcnIBv^N%tddz;!Hdd!d{QgcRM zNm`C;hBu91YfI-7Va8WUiIZN{21RCQhkzj}j|mAMTq-9PL)LIk0kY2@n~l7Gdx-MI zMqZn^n(ZT`FG!?w4#U{Wg5ER`rxW@1RjsC+@{HZ#BfCkRK$rTr)yh@C!>3iigJ{VL z;2~%oLk={goW0kHVzdjL5OOm$iP+pX7mD<5!RzT^1r9_23yM0ckx}%$8)Yp0y&H8% z%6<1zvt>bO<=bhQ$f0U#QU8@>L9aWg+Fs{9?@VU8(q??deVUJB7&v)sK{ds*#H|rr zHRl_y3bU6l_M`0$0OcVRP#(NiC=ZBL%7a-z*2#&$=lDWAq%2CjLBL%Z6rf~v_T5E+ z0+hCKNr9M|BUC{$W!>eJX2XQ>ydW_isZry}dznBV!H4z^TcthRK1HEm#dUpeO|P=r zoV<4-HGKwU74O^>N!&)kMiAP*mG9d9&CO(&x03C1%FoRE#IfT#S2HlqWSj)t`^9yD z8nZUp&*MF~M2b-TBB1KFhk3BbvVA7Q24-+pL$6of14=cknWxX1`1)+&yFU9#06oOh zXA<`E@O0|GzJ^!p-v;`ur7J9yug|sveb&69&t9+Uvk>X(Zf1FCv*``fcgSg%Ede{Z zI|VPe_`YHnK8{NixzNA`;0O3$!4Cz5@8qs5;6RS)73v90J52@!c2+{e>(nXcx&uPuZi>r5S07Ta-|FMn(Nr% z$9tiYmg_jME1I;CfXZdNB-XWu81k$?pOUbpwwnW|>>`3x}s%U`dg_g)dEHF5N( zZ>K4f5gm0jgbH*!jg)k?ds|2ETl@mN1D9p;GjB3d-AFz!BqNq`>w>)4(Uu)`PKwE- zY!3})9`;^JO5IevcSfqtDIpqXTKZ^X=9`1hE+XDSx)@6`^m-{~e(BA6DQ@<0y`BTu znSb^5S8$Jye3B@=SkF$5pJnB)uhJjlv=qL0!j?PkVnI1|Pu063>x0|AhK@rO{Z)n? z{oTdF1V|+k!!p{rkvD_H^TKpReP0@)FTX4lO;NLk7~}_~K0G5jeM0CT$A`qzw+5O@ z7@4F5uzC|KM!GrI=`$};<+qBAz3Nkmi0K`i7EV(r*dD6G)&G5aNWous zd40%jP+s^L6w{y8`v##FWRhK`L4Ryb{ODKf z&mmHke_=3g-ue_PwgQQ8r7uGwelS4v14u;Be?M1Ai7p)oeCXX?@4a=+fF!T+bdIsT zj?pPJqu1yWkRj-P9!x3n{t1`-v|@NI%eT{DmHapyH;Om`e)droSzeoqdl~?h^=dKo zpC22xB#V<(w_9_Yp0iFGHgYlZU=0Dtq3jw^zhUt5YPDR`dDB5d+lh8k80y)Uv1|nU zOW05aZRZhkZc}R#k%0Viay?ZEw|;Sg_t;~+f`2#=Nc;ys*wdT{bK47JVjBHsjvDGrV3=z_DU+qa{Sl%Vx8NR?;(+uvWHAMo75^cjvwf2Z!%H z^sEN4wO|~6n$sB|E2bLhBg|q2Ap^ObMy=|bXzeId>%3PtI|En>u8WQ!Kr}E^I?dO? z!N7FlU+AoiMAa%Uf7aChtTBPt925&Db!t6-Us!wQ+v0)f-v>zn3{!@Sy;F&JA;Ag0Bp89zB=z@0+` zjh`i}w$2UDHTf9$ZcHY}3t5h-ao_37a8D0UBc{y=&W>B&{9)gN9Zq{SL{jZ~k=BTv zN8n)n5UZdv4u+%2Ee37PSSYpMf|wbQf~HzL#xQ^|e={y>v=m&|P$`?Ssf6X_m?jl0 zi3UqWF3d_sUB?ItKYBDeu^w}H9CN-pmez39tmhdrRRxQ>S&Non&Tig#Njo2GXrE*s zf+#Yemx|zanE55c5mnhM7GMV~Sd2lhX!q2LeS(2I9^=R%k?naD?D0sq2;vqT#oVJfI zSv8|wHuh52!hOcCPp|E%RE)?hZ5(e)ceP*^aM*A z+tr&rje439qmd@uT|40WHsr``t)7LMar)xa+4Lo7#rqRyVP4gT1O)yys0b#R&b+z8 z5VA&Em;SapB$qwmg+@i(yC$jvWlK*uZrOYjziD(E?|FoVZg`4*Wno~#wC!GcRVTad z__B+aaMy9w5s?WT3^!5zVkqTOLdi}RY;F<&8JqE7czV)qz6ExwB=3mVL*EhQko%ra z(C{Sm#GB_#$CHkgnCmqk@9q~Y+$MU-C(o{f6?@_c_RY>S)VI5fj;3(LzdjfB!(upP z?6jrdzeHcMCLy;N-qmreb*Vg;%QfN6rlf%qt;r_^Td7*~@FajR2)qypiF-NnDihQ# zdGyF}fQ&5+y6)9eLQV7da?3N#GhyRPLBAG0^sr|ho9Ma$Yrv* aYh@?q(Q>E0Yz99N&^c?MU7&UC-v0qgMY%Qr literal 0 HcmV?d00001 diff --git a/Project2-Stream-Compaction/src/main.cpp b/Project2-Stream-Compaction/src/main.cpp index 4090eca..e8edc72 100644 --- a/Project2-Stream-Compaction/src/main.cpp +++ b/Project2-Stream-Compaction/src/main.cpp @@ -13,7 +13,7 @@ #include #include "testing_helpers.hpp" -const int SIZE = 1<<8; // feel free to change the size of array +const int SIZE = 1<<12; // feel free to change the size of array const int NPOT = SIZE - 3; // Non-Power-Of-Two int *a = new int[SIZE]; int *b = new int[SIZE]; diff --git a/Project2-Stream-Compaction/stream_compaction/efficient.cu b/Project2-Stream-Compaction/stream_compaction/efficient.cu index de2bb61..5aaff9a 100644 --- a/Project2-Stream-Compaction/stream_compaction/efficient.cu +++ b/Project2-Stream-Compaction/stream_compaction/efficient.cu @@ -4,7 +4,7 @@ #include "efficient.h" /*! Block size used for CUDA kernel launch*/ -#define blockSize 1024 +#define blockSize 128 namespace StreamCompaction { namespace Efficient { using StreamCompaction::Common::PerformanceTimer; diff --git a/Project2-Stream-Compaction/stream_compaction/naive.cu b/Project2-Stream-Compaction/stream_compaction/naive.cu index 5434774..c81bbe2 100644 --- a/Project2-Stream-Compaction/stream_compaction/naive.cu +++ b/Project2-Stream-Compaction/stream_compaction/naive.cu @@ -4,7 +4,7 @@ #include "naive.h" /*! Block size used for CUDA kernel launch*/ -#define blockSize 512 +#define blockSize 128 int *dev_A; int *dev_B; From a3d47db28a921ecdb5981ef5bf512dd21eeb94a7 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 19:13:45 -0700 Subject: [PATCH 19/48] Update README.md --- Project2-Stream-Compaction/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/Project2-Stream-Compaction/README.md b/Project2-Stream-Compaction/README.md index 410445a..92cb7cf 100644 --- a/Project2-Stream-Compaction/README.md +++ b/Project2-Stream-Compaction/README.md @@ -8,6 +8,7 @@ ________________________________________________________________________________ ![Developer](https://img.shields.io/badge/Developer-Dhruv-0f97ff.svg?style=flat) ![CUDA 10.1](https://img.shields.io/badge/CUDA-10.1-yellow.svg) ![Built](https://img.shields.io/appveyor/ci/gruntjs/grunt.svg) ![Issues](https://img.shields.io/badge/issues-none-green.svg) ____________________________________________________________________________________ ## Performance Analysis +### Blocksize vs Runtime (SIZE=1<<12) ## Questions **Can you find the performance bottlenecks? Is it memory I/O? Computation? Is it different for each implementation?** From 7563dbb677acb9e3718eb46a536810f4a68ba701 Mon Sep 17 00:00:00 2001 From: botforge Date: Sun, 22 Sep 2019 19:20:28 -0700 Subject: [PATCH 20/48] early exits added --- Project2-Stream-Compaction/img/earlyexits.PNG | Bin 0 -> 773 bytes Project2-Stream-Compaction/src/main.cpp | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 Project2-Stream-Compaction/img/earlyexits.PNG diff --git a/Project2-Stream-Compaction/img/earlyexits.PNG b/Project2-Stream-Compaction/img/earlyexits.PNG new file mode 100644 index 0000000000000000000000000000000000000000..bd189ba8b932d2d21fc720f5c11b4b868dff3112 GIT binary patch literal 773 zcmV+g1N!`lP)Px#1ZP1_K>z@;j|==^1poj532;bRa{vGi!vFvd!vV){sAK>D0-8xgK~!i%?Up@n z(?A%gp|=oxN0J#tg)0Gy=qN z9O(6WYRs5{7!F5(pqy^Er^bvKupEQYljGe$6y1PV_UwPMuX7BiM9u{S(Gr%+O`_Xd zma|91U@!z^GI5~O>8f#x8^}uvz^$6^ihZ49I9tgH!t8Ad{o?u^_(?W>>mmr;b%z^7 z>0Y{?&r9NSb9u{ziGF_w$aw5PyWLgeltd1l%S%EbYVO~yFRZxUpFxO{=swz$fcwK) zoG1H}yz0$*{E&27fWDlvdD(jM*PaqzZ1Gvuel=f9OZQsF?&Q!pU6-G-h5VH1?YC3X zZsZg{Q^kBdcNg;iiJD_Rla{W_e7Nx9{T?hLBGv+=oPit+q@0PA(^2D;^fK?67uS<# z$)^j6pZny;L==YEK0hp3_v1GvUSGIbzXkkT4s%dgj=_l2(>@^Q=VNHK+G@<0fjByP z0*sl6X0r(y#~_4Y4oKoc&CBF`zr3ss=I8TyZMeT0%(5JUN&T2*IRumXG0SoYCiP>M zc?ztIp03F z0RDWGTua_bCQ{JjoFJXU5!zn|oBA=UC+GJR;MX(BQ#pPFtDg_ydiG+g!x7qF2b=ma zTU!oFpsap8$cfQC9HISnu&E!jwdGL0PQ48=h|b|iReu#W^<#D`IaTN!j&Of9m}NNx zlln2satJ2%WA #include "testing_helpers.hpp" -const int SIZE = 1<<12; // feel free to change the size of array +const int SIZE = 1<<8; // feel free to change the size of array const int NPOT = SIZE - 3; // Non-Power-Of-Two int *a = new int[SIZE]; int *b = new int[SIZE]; From 9ca5d3d188bd2876dc9f261f90ec6d6ab59ee012 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 19:35:26 -0700 Subject: [PATCH 21/48] Update README.md --- Project2-Stream-Compaction/README.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Project2-Stream-Compaction/README.md b/Project2-Stream-Compaction/README.md index 92cb7cf..cf81b63 100644 --- a/Project2-Stream-Compaction/README.md +++ b/Project2-Stream-Compaction/README.md @@ -12,10 +12,19 @@ ________________________________________________________________________________ ## Questions **Can you find the performance bottlenecks? Is it memory I/O? Computation? Is it different for each implementation?** -Main Point: I actually removed 2 calls to cudaMemcpy and saw the execution time get halved across . +Main Point: I actually removed 2 calls to cudaMemcpy and saw the execution time get halved across all the GPU Implementations. -Copying back and forth (Device to Device & Device to Host) can seriously increase the execution time of the program. The CPU Implementation takes wins likely because of this. The Work Efficient scan is the faster of the GPU scans, but still requires that we copy. +Copying back and forth (Device to Device & Device to Host) can seriously increase the execution time of the program. +* The CPU Implementation takes wins likely because of no need for copying memory. It can just pass the pointer arround This takes *O(n) +* The Naive Implementation does *O(nlogn)* computations (many of them needless in the event of an non-power-of-2 array) and runs slow than the CPU +The Work Efficient scan is the faster of the GPU scans, but still requires that we copy. While is it *O(n)*, many of the threads in a warp don't exit early when they should. Even worse is that unlike CPU, this still has 2 cudaMemcpy's. +## Extra Credit Features +**Why is my GPU implementation slower than my CPU implementation?** +Apart from the calls to cudaMemcpy, some threads to unneccesarry work because they don't get terminated early as they will not be required for the next level of computation for the work-efficient scan. I optimized this by sending an offset value into the call to the kernel, so any threads that weren't neccesarry in the future could get terminated early. This was also useful because it saves one call to cudaMemcpy. Despite this, my CPU implementation was still slower +This picture illustrates an early exiting strategy on the upsweep. + ![](img/earlyexit.png) + ## Output ```bash **************** From e3123a4de392f6456cd5edcc37ef79e1759c72c0 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 19:36:30 -0700 Subject: [PATCH 22/48] Update README.md --- Project2-Stream-Compaction/README.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Project2-Stream-Compaction/README.md b/Project2-Stream-Compaction/README.md index cf81b63..3db06ff 100644 --- a/Project2-Stream-Compaction/README.md +++ b/Project2-Stream-Compaction/README.md @@ -12,7 +12,8 @@ ________________________________________________________________________________ ## Questions **Can you find the performance bottlenecks? Is it memory I/O? Computation? Is it different for each implementation?** -Main Point: I actually removed 2 calls to cudaMemcpy and saw the execution time get halved across all the GPU Implementations. + +Main Learnong: I actually removed 2 calls to cudaMemcpy and saw the execution time get halved across all the GPU Implementations. Copying back and forth (Device to Device & Device to Host) can seriously increase the execution time of the program. * The CPU Implementation takes wins likely because of no need for copying memory. It can just pass the pointer arround This takes *O(n) @@ -21,8 +22,9 @@ The Work Efficient scan is the faster of the GPU scans, but still requires that ## Extra Credit Features **Why is my GPU implementation slower than my CPU implementation?** -Apart from the calls to cudaMemcpy, some threads to unneccesarry work because they don't get terminated early as they will not be required for the next level of computation for the work-efficient scan. I optimized this by sending an offset value into the call to the kernel, so any threads that weren't neccesarry in the future could get terminated early. This was also useful because it saves one call to cudaMemcpy. Despite this, my CPU implementation was still slower -This picture illustrates an early exiting strategy on the upsweep. +Apart from the calls to cudaMemcpy, some threads to unneccesarry work because they don't get terminated early as they will not be required for the next level of computation for the work-efficient scan. I optimized this by sending an offset value into the call to the kernel, so any threads that weren't neccesarry in the future could get terminated early. This was also useful because it saves one call to cudaMemcpy. Despite this, my CPU implementation was still much faster. + +This picture illustrates an early termination strategy on the upsweep. ![](img/earlyexit.png) ## Output From 5ec8aa4d7448446530309ace97e37e0edef89ad1 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 19:36:55 -0700 Subject: [PATCH 23/48] Update README.md --- Project2-Stream-Compaction/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Project2-Stream-Compaction/README.md b/Project2-Stream-Compaction/README.md index 3db06ff..6498e7e 100644 --- a/Project2-Stream-Compaction/README.md +++ b/Project2-Stream-Compaction/README.md @@ -13,7 +13,7 @@ ________________________________________________________________________________ ## Questions **Can you find the performance bottlenecks? Is it memory I/O? Computation? Is it different for each implementation?** -Main Learnong: I actually removed 2 calls to cudaMemcpy and saw the execution time get halved across all the GPU Implementations. +Main Learning: I actually removed 2 calls to ```cudaMemcpy``` and saw the execution time get halved across all the GPU Implementations. Copying back and forth (Device to Device & Device to Host) can seriously increase the execution time of the program. * The CPU Implementation takes wins likely because of no need for copying memory. It can just pass the pointer arround This takes *O(n) @@ -22,7 +22,8 @@ The Work Efficient scan is the faster of the GPU scans, but still requires that ## Extra Credit Features **Why is my GPU implementation slower than my CPU implementation?** -Apart from the calls to cudaMemcpy, some threads to unneccesarry work because they don't get terminated early as they will not be required for the next level of computation for the work-efficient scan. I optimized this by sending an offset value into the call to the kernel, so any threads that weren't neccesarry in the future could get terminated early. This was also useful because it saves one call to cudaMemcpy. Despite this, my CPU implementation was still much faster. + +Apart from the calls to ```cudaMemcpy```, some threads to unneccesarry work because they don't get terminated early as they will not be required for the next level of computation for the work-efficient scan. I optimized this by sending an offset value into the call to the kernel, so any threads that weren't neccesarry in the future could get terminated early. This was also useful because it saves one call to cudaMemcpy. Despite this, my CPU implementation was still much faster. This picture illustrates an early termination strategy on the upsweep. ![](img/earlyexit.png) From 9092dfa99fcb401f74def5c7c8a1f3a01e5a534d Mon Sep 17 00:00:00 2001 From: botforge Date: Sun, 22 Sep 2019 19:42:00 -0700 Subject: [PATCH 24/48] added performance images --- .../img/runtimevsblocksize.png | Bin 0 -> 10899 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 Project2-Stream-Compaction/img/runtimevsblocksize.png diff --git a/Project2-Stream-Compaction/img/runtimevsblocksize.png b/Project2-Stream-Compaction/img/runtimevsblocksize.png new file mode 100644 index 0000000000000000000000000000000000000000..1c280b551ae44aa8799d447904f97f28cd657d69 GIT binary patch literal 10899 zcmbt)2|SeR+rN>mBU@#OkepK1WJ!cdI2|DgW1mQ3i0qBEl(MBxS;|(nu?)t(8^ut> z*vGEy`_5pP`QMMu`a9>mpa1)N`^0D5^E~%`UDtiB-|Gs#uA|BF1Md$sG&C$%f4Xvm zhK3eSLj!YVVgM^6R~uUxw6)i*!5|MkuBH|LyVj>ufHIx53%#@yX9{1ZLpFc4*B+Cd{KleH#x^ef)n`dY5UJ2B*mX8kjO1n_{uJo8y&*-Vf zv`6`(vdt}IU&3xnc5Zn&DwU6SYqzD0F>}Vyz03FBFv)U}Jk;-akcOr?GhCa7<`@D- z!vqMU<=XoR2%z6v`j=(s$zPv_p8W0UzbqTfjYnJRL|dTjdTml5i)}fIr|hJ;-`4oFtt#2L zeDt|e`|H#C?NJh@9_7270sM!tA<<$vrN^sCL*5b;*kp5TG<^lp!rKvTk4;(*cRc*f zS+fUwOO5w}m-Aa$yLO#J-~~(SHnHpDEhIBMLY3kbzeqL0Z5tGkL=iA1xUt&OfGeTP z%VBqW-R|4RC(|4KPK55V*_;ZKoqg5I^?bbqQ5JZ1??iKKhd-6%k7g9~aei7I!ENay zZT?_9a{867id`g3%sw_$AklrcJGIPpGEGP==yrtWPmbX*F_gY_RZnql^|t-?=4eF0 z+g#&c&cz3~bIx{P$F{mTylITKF>0&k6{y>$_TAQW~ zU#(Yjj3Ae)|3K;eUY{7R!M0>gbU5F+JF}HbADN98Kl$$x{cT)rR*0f3Cn8j~?{04q z{1ck9f{9@X>6>3zI9H$L;3(U?q;n14VYdpVjsz!id^?>WSgVKc&+bts4ZP|)?Z4Z1 zVXalIcf)5kyTtwiS5?$9d`OtsLu%~_h-Bt2J_c=g!6?(L21TAPDOevZik7lW7$kj>A>5wDlO2BcYAUfc*V-AF7!ZNo zO@)c^cEu27{3$zb{f;GFOxocr@U}CWwU&L@E6zebOWurdzxHzcOf9sX?CIqFryC3 zsy=~|0EW;#7QjhTuL)rgt$yEddnu;JY(W2@J>PM0ZtRLh(RtU2+sg87KdI|pm+fA5 zT2rVaguIYqS96rafSFDFRN!LwlG982Guk$i# zk#Z{*tCcGFZ~3<+sT+R^MkEl}!UT%i%ZFT=BzlX(+=a)+-ixUkCwnD^tXnZ%KpnBR zJY_VwS()dzCW=22+tzd#Rxohpj#`jlveB9D;N9%=q^mR@j{-Qu=l8K)xSsv=T?Q;m zR#qJmN**nGdk{OaA(mTt!?8*)S)(h4aOk8?T+E=VV_)HA=VwD9JR72(c7Z%Op$tXU zLm^YJ5TeAw@~PbcZQqFHg4KjjCYsb}+EXGbMvu#bzA%Oe6?pBcQnv_C$VuEH3*IZA zMPhdZc=@o$d#=Dw`^}H+BIo;xo(QBcdE$>3YOujEeD9SlgfCm0(I&|b)IHvYFJ_<< zGK+QblMyet30X)1bGihcq%cQ4UiM@UJXy~dhFlr@+&e0LJ3%$KY~w18iC&GU!0|8! z3Dm_Er!V6ZtFk8$oNTZsUiStBtJt+!;Hj=rE8j3v(xMyNv$?jU1{_{ld0nDyRWG{I z_C&F(ki z59GWggnH$-8av#{Q!O@x8+tGqKS$+QYx?Z%nZzw!&Gb5Gqi>9S(F^G?~Do-2vmFdec7`AsphS;wFVbYGB;+^Gi#3ryBbkPhb9)SPA=^6yWrQ)d}DXN zj8I+*Ys#NTM7CTFPKYj5op|2GX|a;-)V5JW zo%;k!y5dcLy2N@q43AY9wE-z-__#c#gtJO(i<~Qgz+a`8uH&hVOvSM^7wOWrHtsX@ zsc4zZuIs&rw_4C1uv9dANeXeu4KUb#%fce{K<((E))I9`q0I9`)_Ds0l0lJH_jLRQ z)2Ge{3)J3_9^;yoO`~vFB`r%C&8Ag81*eW&z(-_5q zm_|b{J#Jz$#dC*-CklSZm3511Bs7K%75C8^9>}YoZ6KVUZ+2|)Js4mpfL(DKc%Wu7 zaB8epG4;@+)v^=W$_RC@u!P4k0o4*{++>x{_(_d(wl$($tF9|FH&E`oK0QV6iMK-# z9Q{HQzo-WUpJ{u+9NE1t@U}fLlCk)NR#Ybwe9PvteQaQ)>bVx#FkjKj*VY;%?~yjm z+L~T7#O9a*}mGqf*ikuf!wKT#~`-e?tP;$6LM6W&DrDPITz< z%O#CHxtCg%|l2{F1219bt6?*dc*~`!}iau$2*$(oU~g;PDWd#BZGzN zC;CKSJy&TBKjq%xcgRyH`iA-W1;`sI1)9 z)E1*EeQ0)qI3Y?@Y6qdWHGs2UDTtKBcnXyo5~&n?JZRHu_}Q*l0-szc;>NF1PF5A9x>j2yJ`+gu%I;xUraxAEb&iy}+8Sc1WPyGU+j9{f&NWv3 z;~&zC%jEjCWG6UIt{~ws8rp6hvBVU9SAUO`sV89xoQG0i7i+d}#+-M()9rn8XYz3| znx`-HYUlE#5lFIU^@Vk))LKCbR?W}j0CWBD5VA2bA4UUX246DMT&$s`VaL+!{m_Z; zE&a$nqAwjvqSjAec)}Yz}$)9W}66bt*yFI%4wcpCC#1s}jdD*-DV-_Ge zcH8{+luSa`Mb`Z+rRJO)mb(S~{V{SQx!?JP+9F%cO%84qFQhaalt=y+J*T~R%GRWE-BKHWW-tNl1LyAOg z^xAf?`7H%wk4{E-3=#$GzyU3$bU`4iF1wONM)8=|VYR;~U{ zVKUCO{R5+5+><&`hlqI3MVa3F9MnNsx1`GPnJCR-3fezDVK#NF$;^J|`y3WMoReO>LJd2v@3Sz2lyh6>L;%|iRZQyA2?^T zN45!HZ_^ZvSJ_#rH5A1Dtl9u7n*6WqRE_IlFTC2Xir6SCs0AJP{LyHw1#hiN?j_hC zNGwVVFfiqJDxXes!*d$lG!>aCw!()fmT1%EB`ABRfh;dYJ(O$zcNxS1XXaQ{=xMQh7uhH<{ zl*NH^_t_hhE%C87XQ>+p;3dO;-n?fJ7r%K71#%{$a~xfF*NNN{_P-t%2170klx1i} z2u#1RO~~-@KahwwOB?{HY5q`xi|ZQ`wVKR%*ViGfc~lA+ovIa`#wg(RuFu(+?sL2} zLV0(sV>(Qx<C(p#A0+v(P_i7a&Ru1YI#NizH&~u( zWa%r%EFi4;{8|IQr&-qAhs;^mx+TjvA@29WkMyriw0LKagjGxxl}#pi->|WNqKX|TTHSM~pny%S+UV1=0Uj_bt_Vp?d(N+v zCT(4xGTs!8wx|MLJtRbtsJi(zup_~HA>%Bl_YGP$mq+zU3xmuhwk>gQfNkEG-3CSN z{c_+3=*5Av%xnhry8dvrFLQ~t7gjq~J|jOrzrGwQWfixlk~;>xV#nDrGlo zkXY>4U+B=AZ%;z+55J`D@KXca;WqA%K2#S+}QL%`h#wm*Gp^fZTL% zDvwLNqI+YD>|qc!{z1n7avT4$3~M_1{}X%u9FqQrEC0ts-l;pr$w_3FkEyn|Co~j( zT3L?V*Pgbu*wYGto;gLs1b>~-Cs`ve=r4*LfFBakxdt>t8`LXWK=4;*iMdi0L!Vv> z>jTN@lQ-t|aelk)z}9av&a}3F6Se%F^^mCnYSaFRagWi5c9y*$H==>CCVLRfaBP4Bbl8DX-eyiRNg<)}Lv+Cq7%+ zdS@;cMmMLzfq` z#5(nXwl&b;#|absi{YuZWM*3q)}sp8der@>TV{>7032Wkq%_^GC8ez?@E~OdplZhu zkurAeDV+cdo7*<5@+P&Q(7&qS0T7Y2`)m_^+d<%ZSxu^1Or>Y$`py8?F~okrFc%Y? zmJ93xF~7G2KFfAt)MGmWyt`9D8dLn~?A^|CS*K|(7&Az3%p0~mZx+EzMOlsa-m3_U zVl~=24&hwThR_}`+9-&shXE)4=ha^i{DX!+-rEn~BWdr1|85n~6YvKCe>nq?^_|uK zdKGZ@cP78@23Ti;H#_}<7quZ)ER^rXz>bBA#RMIYBdxV4EvB=6XE4&F!ozWFMctLjeDK12P}1_)P%1rNQM(WTDY@gfjz^rI{)Tn7gwXi zjeqd~fK;LTtRd&+nuAziELOEo*>r)Ik{EdY?E}C^Z&%%PKceK}pg5DBe3KETRYS{; z{h3AJg2MXZFs{!RXuoz-ltkA0d1h|OFB4YUF=sCQek4GzZU967p=4D6WNexFvxL}l zQv5ETgrl8sOY2b7rp*Q z*6zkkcebfLh$u54*m_I{nW1&!^JQ2VS$JQw@JC4^yD@q~bmEXE(*llxT%f=Si4jE9 z|7I1)jOM%OF78Pggyr9M0|KQJ|39rlRQ^F0IOsyT30A z?Ikr)0jX`AP&)dT@8;{|Ti!+)ny&~-7>-8d9(^I-?5~-9X-~*~d3PlG;ur=Ba`5&S zzPpmBIos?D)VMtwiWv4}k9?{~eV+N3m1sM+E)%c?vuru9d66Mx~>%)#@xK6gXt7x)u zp|ETA=|v{Vj9P4l{8l*zAf~l{4>I*OJm1d-*9{^Y{?@EVOB0g%+N>AT~pg* zMhuZ52f3}gkfL-?{Q$R@Ks?kif|v{k`tuMdi2)L55HPSk4dOM2X=T%kw@%qZ(gyEv zVJ{|U{r1d7AX~6ScAMk7@Pf2dgXkU2(5}rmF)fTv~4S?lbxyJ@3Ln1Md;-LGesfJrCjFBlH(7qZOkS&{gmsBAd|)qOk}N67btj%pgW8uq(0eZMsL{`QP`VJ^F0# zCjY)Itm%&Z9*GqIv2e%7fVc=oOEdPGwkXicdQgtcUj?HH+lPuWcyR5=WHE~ z-L(4FJ`PEcLbt3Y3)gG){zSi_K2*D{&-a%&D+Mw37uvlN6%$kCs21Eb5f?o z4p*TzY9h!4Y&B1h_777$ve%)eOUJFJ{M}ZnrjB&tyr*-*{HNYOP2;xk5~_gD7TR^R zN8@(JW$(7Wvk!OkdlzAuuev*}JMBc7aDqB_beQ;K05zw84l5r&_U;j)xu>}zX~31a zWU+Mg`C6)!Z${tx?ub*4fbUvcIBF2U_Yh8A-U4ObLjVJr^Q_vW_Sb^iBcFuk$OWLy zd~||8Wy^lu>2p9Tka0NHWxR3i4Z-ImIj|RR;XPl}iPi;3q!=mN$ybbA@q*THmJJS& zyv251fYzDp>u!CoR`7zKd>No{1z7VwM524Qv0dFWjwh2>a&0NR9EA}IBhS`A2UR8B zccZu0Pknw5+505)^zc3ES4P!OewIm$=c%!oMrQZb%jfc&@~NsTpMu+zS@Fl+}PD zP>IRjAE}!|P#eW^nczy7YC^sZ9n!s!*ZPc;5tDxF=oM~p_zQDgNy$EPxRXz^M$Pu9 zAT>ruaXRHA(w|f@yaz^Ze+^{I_x<+%sX!y@vt;>H(i#ATTkCVxWD$wvfyw;q2aC2w zxN$K<-iuXh5!B6yCl?Q|qh>PD?Et3e0QVI+D2PLf zf6$ZqT+E2+@i;f&thQ0%^Z7*dZOg|^^I{z7_RMh7pr@sL*zRYM#1w${o|^~r3U(WF zz4pS0HXg9xxsjbB6z*1CSNS3a%~@r1sm6X6CUz}RrHcIpXyuwg{rnP@(c{&ihmA5E z;d6ZaNMT8XpeEZb9es-O_Oc}FT*Z8`DOX*F`IpoOJIf6P328CJgI28FsnHg1jI(;F=cC1xOT z&(^D#o=6=Q2z?=_>;`(m;ClQeHIA>8M=glCgF?a8jXYPN>k;?KTdPZ>Ch6r!|Fy@d zE+eKoWdfKR_iLm`;R->At>D4k9N3Tklu7?PRlIm;0%C-+xvDRXVfU|*Rb7f=-Kw+H ze)j%@_d*%^dViA?uF@fpUsGchqX6E3+UaWglF`^?*eo+#h<8HaD>r#L!ryG; z*q7(baJ^cEXZ7rt1+e#kwB0)OG;L~CgiacPmc4-2*N1Ztp!P}}%aI^Z8kL!_F|!=1 zVwPr`&a&LKM}am#J{h15al4ZEffO*+lEz*aQsT5mr_R>Bv$a~FQBe9a4COxEeuB>< zEMa={myUxlFWmCx;k&jUEE;0oo`RnUMdRCl_1 zfLyiSZEDQc4d8Dxc?6!qL74B%(7$!on2#?pU2l2NZcNk2rh+sDGlipx@>#`7LEO_%i(* z7$72`vPXDafMTOTU9JVi1hMM*`g4N{!lzILR_)$9k0LD?vvLAB8RwxU10OYl_{?(5 z<&Z<~GE8iSm$Bml0X2hRhUZI$6P~;s34mPA9ZQrIIl-mzbT9XtZCb-;0x9W6WdR2Fwjl5G z4hS|urxM-cG@^_*i*I)aPIQJDE`3A=H4Rc4YZ1Hm;0Zs=qXoj7&>+N<08ZtbdVx%B z91BvVUmty>q=S3>ee~*I2DScW`7eW7|1`g~G%XM2UOGWU`?vyKUcB?i{dNGh?;U-0 zQu}91%1U(X&UP-2f>NA%7ii+(3ySQ!MKC15^m^w3wt;4rEIk(Q)_zs#M+hgXK}#aW z-$RAI&Ax8ohzq~4s_rZmHVAm9bm%BaQ~?O?Nkjc}{#e%YV=oE;)`r&CRh9#&VWr95 zbj-z*#phBK2 zbQ%8{Pe&*I&Yc0%*w~0+%(BG!$$?Hf-$D`5!uX;|k-Y(M>wd3UM1mfyPNZ(k_oD$} z;@R-LnX;@VmVDr86 zrcPCI071-J+5`o12XH!3@(w-k-T}C+Fd-n4ga9a_53&Ch#{kAxH#n3x5XZ+KHfzK};Pu&k0A~Cwgb&yMqWgsrbSVruu`cH2Zda}A!!o$A&^VX9GQ7?8urhp^x9t!}VQeE= z;L9ceQ`d$PgmTaaLt&PSd1XyPAC$f%c#Q-*uoB}v zhu$c9E|^I?h7_2U4US*U9hs~2sn(x1eth2_%FEVu&X2#I+bWAO;*7u>KVF7^$rhb| z8IphoE<%JaBrJHEqx5(ozHXc8C;Ti46yx=0z6OgV#3*loOcM<{Q#OmjC>*HyGh@IV u{Vzu_|76+F%YWVgvTooSYS5pkF!)MRklm;AabVb#=Bm2Rm7L3nNB<83q2ba1 literal 0 HcmV?d00001 From 14ad554e11f126d0a03fdf4c47062d81efe4adf3 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 19:42:18 -0700 Subject: [PATCH 25/48] Update README.md --- Project2-Stream-Compaction/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project2-Stream-Compaction/README.md b/Project2-Stream-Compaction/README.md index 6498e7e..66e58e3 100644 --- a/Project2-Stream-Compaction/README.md +++ b/Project2-Stream-Compaction/README.md @@ -8,7 +8,7 @@ ________________________________________________________________________________ ![Developer](https://img.shields.io/badge/Developer-Dhruv-0f97ff.svg?style=flat) ![CUDA 10.1](https://img.shields.io/badge/CUDA-10.1-yellow.svg) ![Built](https://img.shields.io/appveyor/ci/gruntjs/grunt.svg) ![Issues](https://img.shields.io/badge/issues-none-green.svg) ____________________________________________________________________________________ ## Performance Analysis -### Blocksize vs Runtime (SIZE=1<<12) +### Blocksize vs Runtime (SIZE = 4096, 1<<12) ## Questions **Can you find the performance bottlenecks? Is it memory I/O? Computation? Is it different for each implementation?** From 60588414bedbae91857aed86560869e3e4e774d5 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 19:43:12 -0700 Subject: [PATCH 26/48] Update README.md --- Project2-Stream-Compaction/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Project2-Stream-Compaction/README.md b/Project2-Stream-Compaction/README.md index 66e58e3..1f4cee1 100644 --- a/Project2-Stream-Compaction/README.md +++ b/Project2-Stream-Compaction/README.md @@ -8,7 +8,8 @@ ________________________________________________________________________________ ![Developer](https://img.shields.io/badge/Developer-Dhruv-0f97ff.svg?style=flat) ![CUDA 10.1](https://img.shields.io/badge/CUDA-10.1-yellow.svg) ![Built](https://img.shields.io/appveyor/ci/gruntjs/grunt.svg) ![Issues](https://img.shields.io/badge/issues-none-green.svg) ____________________________________________________________________________________ ## Performance Analysis -### Blocksize vs Runtime (SIZE = 4096, 1<<12) +### Runtime vs Blocksize (Array SIZE = 4096, 1<<12) + ![](img/runtimevsblocksize.png) ## Questions **Can you find the performance bottlenecks? Is it memory I/O? Computation? Is it different for each implementation?** @@ -26,7 +27,7 @@ The Work Efficient scan is the faster of the GPU scans, but still requires that Apart from the calls to ```cudaMemcpy```, some threads to unneccesarry work because they don't get terminated early as they will not be required for the next level of computation for the work-efficient scan. I optimized this by sending an offset value into the call to the kernel, so any threads that weren't neccesarry in the future could get terminated early. This was also useful because it saves one call to cudaMemcpy. Despite this, my CPU implementation was still much faster. This picture illustrates an early termination strategy on the upsweep. - ![](img/earlyexit.png) + ![](img/earlyexits.png) ## Output ```bash From 4761e59015ad42efed13351a9231c5080588420f Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 19:43:41 -0700 Subject: [PATCH 27/48] Update README.md --- Project2-Stream-Compaction/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project2-Stream-Compaction/README.md b/Project2-Stream-Compaction/README.md index 1f4cee1..f6979c9 100644 --- a/Project2-Stream-Compaction/README.md +++ b/Project2-Stream-Compaction/README.md @@ -8,7 +8,7 @@ ________________________________________________________________________________ ![Developer](https://img.shields.io/badge/Developer-Dhruv-0f97ff.svg?style=flat) ![CUDA 10.1](https://img.shields.io/badge/CUDA-10.1-yellow.svg) ![Built](https://img.shields.io/appveyor/ci/gruntjs/grunt.svg) ![Issues](https://img.shields.io/badge/issues-none-green.svg) ____________________________________________________________________________________ ## Performance Analysis -### Runtime vs Blocksize (Array SIZE = 4096, 1<<12) +### Runtime vs Blocksize (ARRAY SIZE = 4096, 1<<12) ![](img/runtimevsblocksize.png) ## Questions From 2dd7e71cbdbc575d4dfdf02ebe994f8d629e2ea0 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 19:44:28 -0700 Subject: [PATCH 28/48] Update README.md --- Project2-Stream-Compaction/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project2-Stream-Compaction/README.md b/Project2-Stream-Compaction/README.md index f6979c9..f775ba7 100644 --- a/Project2-Stream-Compaction/README.md +++ b/Project2-Stream-Compaction/README.md @@ -27,7 +27,7 @@ The Work Efficient scan is the faster of the GPU scans, but still requires that Apart from the calls to ```cudaMemcpy```, some threads to unneccesarry work because they don't get terminated early as they will not be required for the next level of computation for the work-efficient scan. I optimized this by sending an offset value into the call to the kernel, so any threads that weren't neccesarry in the future could get terminated early. This was also useful because it saves one call to cudaMemcpy. Despite this, my CPU implementation was still much faster. This picture illustrates an early termination strategy on the upsweep. - ![](img/earlyexits.png) + ![](img/earlyexits.PNG) ## Output ```bash From 5a38a2f03b8b713df02a24cb1118496459418924 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 19:44:42 -0700 Subject: [PATCH 29/48] Update README.md --- Project2-Stream-Compaction/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/Project2-Stream-Compaction/README.md b/Project2-Stream-Compaction/README.md index f775ba7..00d8534 100644 --- a/Project2-Stream-Compaction/README.md +++ b/Project2-Stream-Compaction/README.md @@ -27,6 +27,7 @@ The Work Efficient scan is the faster of the GPU scans, but still requires that Apart from the calls to ```cudaMemcpy```, some threads to unneccesarry work because they don't get terminated early as they will not be required for the next level of computation for the work-efficient scan. I optimized this by sending an offset value into the call to the kernel, so any threads that weren't neccesarry in the future could get terminated early. This was also useful because it saves one call to cudaMemcpy. Despite this, my CPU implementation was still much faster. This picture illustrates an early termination strategy on the upsweep. + ![](img/earlyexits.PNG) ## Output From 18a9ed092bd53779fa858a0d7aab2f4d2bc90199 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 19:46:34 -0700 Subject: [PATCH 30/48] Update README.md --- Project2-Stream-Compaction/README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Project2-Stream-Compaction/README.md b/Project2-Stream-Compaction/README.md index 00d8534..b5557d1 100644 --- a/Project2-Stream-Compaction/README.md +++ b/Project2-Stream-Compaction/README.md @@ -10,6 +10,11 @@ ________________________________________________________________________________ ## Performance Analysis ### Runtime vs Blocksize (ARRAY SIZE = 4096, 1<<12) ![](img/runtimevsblocksize.png) +**Chosen Blocksizes** +* Naive : 128 +* Work Efficient: 1024 + +### Runtime vs Array Size () ## Questions **Can you find the performance bottlenecks? Is it memory I/O? Computation? Is it different for each implementation?** From 01c143ba72af9d124949ff98caefe15169e1496e Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 19:47:10 -0700 Subject: [PATCH 31/48] Update README.md --- Project2-Stream-Compaction/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Project2-Stream-Compaction/README.md b/Project2-Stream-Compaction/README.md index b5557d1..ab4b98d 100644 --- a/Project2-Stream-Compaction/README.md +++ b/Project2-Stream-Compaction/README.md @@ -12,9 +12,9 @@ ________________________________________________________________________________ ![](img/runtimevsblocksize.png) **Chosen Blocksizes** * Naive : 128 -* Work Efficient: 1024 +* Work Efficient: 128 -### Runtime vs Array Size () +### Runtime vs Array Size (BLOCKSIZE = 128) ## Questions **Can you find the performance bottlenecks? Is it memory I/O? Computation? Is it different for each implementation?** From 99b49c49bd1515744168c3c3eb900dbeeee11ff5 Mon Sep 17 00:00:00 2001 From: botforge Date: Sun, 22 Sep 2019 19:49:32 -0700 Subject: [PATCH 32/48] added charreg --- Project2-Character-Recognition/img/chareg.PNG | Bin 0 -> 26320 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 Project2-Character-Recognition/img/chareg.PNG diff --git a/Project2-Character-Recognition/img/chareg.PNG b/Project2-Character-Recognition/img/chareg.PNG new file mode 100644 index 0000000000000000000000000000000000000000..2da0aaef1fcc4e55549b2bf75bda9dd7408cd0c7 GIT binary patch literal 26320 zcmeFZ2~<;OSXN@52Hp)eBqXdxhA=mXLuvBQ)NVu^sX zsVIFHX+xW+D5!`*APFWw#7LJIVt@olNb+`2PHtvBxd-*{vE?;p=GFtAyB?X}mM zYtHZc=3HENb~?67eWSXXn%XM6<8e=lOHCmfsgMZU5?qRRlM0e z4*ak<;E>}XHMPp*74v7706#AeJMJE-rl!%V`ueUtwCJpwn!lgj;Y08XK7xVJG8?ux zCk9f?TADrjJZ*?IyD{viu+B;nChNX*ecqkB=80Y2H@r`HxErIi#NQos$#h9Ay1aO^ z$x1r|W2ZfK*S&svmuIGdL#0@si@3Asxt(WkNzIz$&wu!S2W}&AJb{^MHkDIL<rsHVgyJ~+%nftzY!d!L+Cl}E&Vnzy7K8Gk_z+lzX9*n%*)hn>lt8Fd2xrRw(ph4J z+;173pcp6ktSiBdg(@`#tq|#jQSCM0LB%IpEMzhU!ce@xLwNHs`tw7Q6vF&j&R^eU zI}@2uq_`z@miOq(;{`&+psRsr|w*&t2~Djq!7+2Nl2;TXvzQ$C!W^KX>^v~6d0Kn>rQtY4XpJB_flp~2?dHx+8GKL z*{ieu!L0R$bbS4oSV-62k*G)*YvEpq_VE~4?oMDG%Qwe~QYHCE*tx8V3_dOGSik8@ zUmK5*DrMU^b@r9RNYoDfxX}CH5cb@BEw?EwVZAV>=@uK#;$W1VO`#blIB`hf+mV`j z0d_t6?f^0#Rag^$v;Fk;hM8GC?p&FDGtP&etNS3?Jwk>N@A>1Pzjn>Raacu?bGoN8 ziqNb3aD^;qSFys-{<*?U*%T7;F7Cwkck*Vbja}^eBZIJfmwmKqlQo&j2Al%9`wF@u zBrrp_1Ty)S5U6A#=Z7U6r30i}v6K@VqL_y0M%$8U9u{qN)J~WJ~d9+D0{4Jj|1GRdmul-^=PHs0ixq7GJ z;QH7B`6O$otRw_Ua>oTx^9pE2FXeY_7=8ot>ld?NIctk%pe3k`5CJk+ktsrhwg=$) zSxtwzkh#s<1)GpcmzZoAxxA+TZq>1@k{1fxc#9w&d4pLw6-Q%nLNVmnmC?^zVjiM4 zvHC)-d;B`kM^O!&eGX4Ms<*R~(Vcsm%099-@OcbVQZUvuQqZ-;tSFHFuH z^w}raZzl#`4rXfo55&e&cnixWvt%KRS6%>t+^k1z&8Z)TTmd-@DNm^C(&Jy+Tm z^RV*%sR;f(QHQt?DS-~uIkx?lEa;0l|MQ7gcD}PpH_0Fo zTvk~3sf*0wI8-}StU85g!e)?W<3;XCw8^TM!bW#L{GJ_Y?QWyKsf9J=!=^x%RjtoI#|jyn3h-jt@E{wrTNcQux}LI=KIlMde!$?&9C4b)OCNAq(S7YN0g{g!rOLAiMvfq-=On5$2zLc-IQB&B?k_OL5d0vatfek2M#<$()@Q{7E zXELz@oZbuyVb9%L_KF2^`5^y5wrs8o#h4@!kiu#EYZI_ui&7^6Z2aGK2Yqyeay-kFb$k%EBR@A~7<-bYfMzK^}d9$Lr_Si=1=uDGQwG{N6 zrEGL8#af}p*q=D);=AKLsZ>Z0;g2}WV3}W4F48gmZog1oZ}TdC@6G@=pBx!6f0kym zBm581j?~Opz3QvGTM&PyRULpUOF7aU0ac=lE0$X*N<&e~XSk_O0abh{bYlga&!$2Y zM*1-~h;b3!8&YS}qi5oz{x=5iRTKqIe18C2dbndH>ecjEXH4CB#B1;^eKkei-WnnQ z8tRTY-XgRHJmV31Gfr201YCnp569j0_WTQL@Z z2x5fRtSe`y?9*f(p$mh%SiVMR0O9?f&Ua^k()XlAb5L0BfMD5AVFFF|+AI<-65 z*Ps$b=yp*Tl3LE4SD@p($_T@LZiSq>-}FyalI_xavJS~qFXh>Wk8KXvI*ku!&f8{GD0#P^yrfj$ z5LVmdHa?TX$`Ld~lg(TH8S4oC^5?XzvUeGudf-hB*x}^2&H)OiRjDa^dNIFCBVE&x z1e7=0TNm@(i|T`43mKs#6=B^5NdfNE(P`3$`g?oyqOYBL z5KEVqV3|3o^Lt_iLRxq(+|xpmyV<^1arV<$#b+EM<9-ZhZ$k8XFPGwRTE5uQL^FMJ ziBc?#L&{P)Q`#)tlw9d2qjQwV8lfT!_a>1CJE|;@&R}O^vQHsOX!bKl?`PK>4qb`p z-Ot2M550x;EPaHlK8kYhXj~&cG~yuHF@Z$o+`Hf-KC_x+I+1KyFnP$Ma)Koa&#!Wv zseY^c*qB!|=XIT({(wOUDkXpik}1Y>Gi1i}nY(+^UP6{d`IV)t5p&K3d*sbE)aMKz2K z*_NL*ns)oi*(~BS)G^ZVeO=bdo+O3a6g>nMWL-6t7)`epNnHd5Qoj>~X8c`(Ty!Ci zw|8ook>mL$bpl-#N_mYV6%(r2Ci(H+ZaC)gxtRx*UZ<#|6557fxguO zr!B9zY41qzy6%JQmi6pPMdeF>JE4Djd9lO9Onqo^DHZ zja!bzy|1U-3>OE@Q5CU#)$Y5llxDG8XXJB|U_~2l5Gz=!JTC*`X2)#BAnYk%P4Ij;b5(-eVyD20rDGfrkvnhq4YEMxJ~IXfoUpcX0Sud zkOB!jlzggS3XVym-3Nby6gR!snw{TMntaL2^E$8s6AmJqhh$oxEAqqePMHlEN9U)z z(;)h`IxMf*rVy7B*;>@hqu1zWND8X+KKRzdUhmKG-Ko9Nc6*@c-+W%>y{J#M@3c<- zRqxhnlq37WYfU<2?hnZ*ZU`5u{J@c(mX#M$-%*6Z`6LNj*)9yFa*t6-@}g?(CZr#rlY?u6y$)giDnss_#Pqy_;)iYDt68_LUk6ZLp z=y5*j{&kBWM4O*%@A#Ea)8>1zJ-Fw+B(9a*q1-6M3*J)JgV`GPq18FG+S8RbDmxGY zEk>Ggxucs9S;5st{>qblUdOuw=?p?(sZo#bW3pkp1J#sQc~3A-BnIlD*UG--O1;c|}pnFn9Y3F4*k# z3D|UZXSjaT2dN=({|Q=4`-kC3((q_6C7E|^4LM-#IBY)FVtguA|7l$|Zld>-{*Lgh zVyhhvqq+`r)zb+&$TEqkV8bVSk&Rv`ROum%CmS~^f5p9?+evP3@wt$YiArP^M@PzE z38?**Iee%f!C_Dz^T!-M5#!k^ab%Uqo3G=XQ1Qq&3=+Sv1J5iLAsIqTd8v&F#!{AW z=YzC>sgJ!a)p>OmTiPqJYoV}a3hAyDra{);N>jN_?BKN)?{3#w+_jg!z6RFhpw4_Ovr63e&1RZkU5$XG6u&6yJ@$?^ft94aPJYJufi zu#ieIGV%t|<3{}ViKTmY{>;}bBEDF98GB_;^Y?1M{CeaMM3d%>#x*2bONuz_q|dfA zv?1?GCUbrfz7m?zSTTlfH05@Q<smkXzZ?<+*7Yh_J^WN!?@wljt9wlmKAI_FXfWn7@MbMI7EeisNa z&F3fJSSY(n*_&If{=k-1)uoE09?-3ptmH9QS-z6csvgOu3@RaQaZX z1E1zP{2SRw@h9w_1IILL2G(Y%;dF1{=D|bt-QlAf@;<&Y)MF>h8?Za52p8E%H%|B~ zBQ;ooff7GC`{Q{Wg@6-1(UPuYP%;WVJipW zLw%I~h`M;gWb^MjA1Ab~pKeGPz zlTAA|v(r8MEMV1-I6Lln0Ze?d!&D1s|4&Yw;aTtUFQoqIzw`Hh6$z>yt^Sw$-QAYa z|I!_C*RFph`s>&KJJEM9Qq;g?`xkVGklcGP4DV#=>XPqNKf~4#H>y4_CRXf)a|Q;; z?LAQT=84yFs*7s5iQcy2fdL{)Pmk3j!n(bci+aJ<*(`30<}=OCGu=OipKBa1^o_@* zYj&6sStL?0Y`MIl2~r2jD3HFF?UFZb;@1{YiJEbGEiI}JFS5z7qBQ3?D|EsylJDR? z{4O3&L$|UZAC3>&T7UCVMVG#XU>Dp%K-8RpUd%nOX|OQ5+(uofxYEYl_c-eIQtdM6 z?`UGond7=pw0`tJ>W9FeOz}LE{rZ_A9(o%*xno)^PrVQVS*m(oVfoq;&Q$Kq{hr#h zMHC^FFRvVbdT!a%A)au<31Q=$-hoWbo%U?x8YqLf!CvdP^9)GXuDg?0;&i}<*oe^C$0WI))D<+z)yV~>%i}QnCK{_1I z`;f|0sH11UfBf3nhGERsP~GzwU2TiroX2HWo-bnje7YTx2OVSd*w?M3PqJo!H5{m^ zl)Dw{shWnxM-UtL8y{FfFa9NX$ym8}H{P8--R%%0kcAHSHH+wab+RKIv^ z+jN!J)C6T)pCCX4giXe@OB00UITvoDJEw9_(&`wr_PSg@8;p~<=FJqXSVORC>Ih~p zA*FnzI1s{wEOdeH1Hm1NtaGfu=cH;oNSLB&VvqWxp1>Ve)}B4k)8)&Hi!Ykwwe*PI zOTpv8y^7U$v~b&hXA^q9+&;|O1J`H!?z=X&O})@W4Q3mIdGR_iC~&LSj!=!^i@EA_ zBCPx9Ig5&(o@vR|z^58?v3z0uyehn*VmUGV(xjp<>P+0q;;P?tcd1NBFH^F5N%vX{ zoPG)VJ^edZY?^0@!xJ-|l7ZAn4u}k1cc1Hw8ko?_W03^e~KIs@E(pE`pUeslKNdIz9%g{HE>&#FQ>YjSDe2IA46mK=^|om^D8}DaaZ)V z&4ny&vv0$a+oJW?K#O|GC-JFxqhMdr(edhg{1Len9DPniz_GrrM*p^x>Bvd=X7c*wlByS>c z)Te9N>AY{lUsED3@5x-ZI~^a?AZSur(By)5L+t0}e*Tz0-0IgvzL|CV!98p)ak{gbbrr%Qq^`SE6({9Z6|+7U&6{Z!HD<+KbuycBa^}t!iq04B)q0NbthkbnoEK z>n}g=5TKLj~=l?Q}k~bv|W-0ch_TcaE!7 z)K#sT-o~=H7?*y+*;Sx`3>125MtlZ~c`)1-G-Q|6C1Ab~Lna7xPJ;|IK{y{ead>K? zD+*LK06zFinV!h&5*CcJ?(eCqm4TQP;McQ8DC&BBT&=vrU8d-^7wZ2gRCY@pxP|U~ z!aj=G2Ss|B02eryDMwf-(kYP3W*ww=lx~qe({qY>i(nq346gc=*1kne?ZB}jr@qO< z&#a1Xq%sQma=S<9HRlKi+NV50Z?SXvL!xGgWxm1_6K;UWIY4^{VMrs0?Lt#3R=*-W zsUz%xuDB&HTG?AiQ2(x$3wo1GMYq$^gr+k`nGr-7U0E5_ZfB|sRHW?Y07 zST&y7j-`=gQq(-;CXHk{Qno@(Z5Oo2d?jel#Li)8Xs4ZE&aN+yvikT3>4CAiI1VgW zmvNvzP;Q>6Ft!n89()4~>5yyklYV&$OOYw?-dKXc$wWy`MZxzZv}{Q^mlQ2a)RXNl z@1O$B+$QKs6^(!0Frg1L%#>joxd~P)ySrr|NdMO#cjbjqd zS&KI?Ye?k-N|i-Q$ft@F-y>&Ul_&Bh4j&`@z6oSs`!TQ0YCN(>ma>zd2Ra4GKHR8f zR-CT27guWt4%c&fe@n}p>PV6IE|0ORkl%Arw$3Jl8Ym{Wl-cqW9s#mL!k+lq=~enf z3}(y0^TOBW5wZX$8AF7qmgjT#F{#;>GSk9d$Qi+xLEq1H!wE#E?pk3--MVqcJ{14? zvl;y&_Yzb0rsle{vbI3%{Ip0vjWabBtYPB{cxrvmL>`km=R%cx=G6CAIh{?*?VnBI zPex2UGwDHiLvmjy39f085A@fIH-WM%T3*}vl*46~X7U+C^x0f29_p?v#`9 zwnp}#YmMOSM~>3b0(Vf*=plAC^Snw<;W&QCx^N(`5{~xTrR)`eHl`%Fd48v2Xg33 zk+3bMokrTMW2$mWGS)6Q=Yw|Yc*+T;>xVoW-L`I!J1Z>y^aGMBrXMn6)Ck0CzM8)H{)8cY}V*rK0WtHb{b;n0@Oa zrwM=^I zyR*+Vrc{kD?>dl-={9#to|}8anmFOqr7j+^ej6<{0*#k$CStvg4J^Ol9E3az3tAzM z0p$?iY?jR{UW!Tn2_U|HAE?vtrjs<_6QN!n5HVkFgH@N&Xen_x`iyDpd;mkatBrKF zQx=R-PIbs)ySds9y2@IyVI&9Xc^+_eDzh?Kn4qnIXAXO~FtlVK^gm8gx#~yMa$9lJ zptxeiDtdbz&3{=|~QJi;)0b0hUfRHZhG436oZ~aPN$_Y@Gy`2iU<1{LsI!4nGeD>R|iXES8 zK53O0-ph5;)xU#LOJ!QsND0tmWq~`2kNM>!2Knm!$XF-k7UU%I3T&o_u*I?sOOVCM1?jg0F(`8`YGh3m^k9WR!LeFObi$9Uel>b6$f8re=g&~c zkM9?78d2A>1h5RlCu~PpK~5@H7HgfJGG1+!s^48 z!GQ<|tgsCL_~3umE}EWnHYQ_AOFJjTRn7t*3-4cGS9wGBQ4ef}811PGi+;hSVjuh% z*%R+sEsuP3swge?-lN>9E^F^WCM-RtzwV!q^qt|qBGKgDD%ZuT$2~4L_zDSDf8#a( z4o!)rE9pZ&x=HsSR5z$u2Ll8Ayg5)lAE@j<7poDaJTdkxvMe}v#!IWDJph(mKr7B= z#Bw7mKmZi2Vlk$AYY6-=ILJz6LvWQ1?Z;fa#{|QrU4cvb#%u1m6lsXf3{G862m;SR zACF%M!w{y;f^f@TT!*ZECZLUfg;A(=B^+@@%yXi&PcLiFVT)f6pb-@yViWl6wYe;) zzCW#Ek<4h=jEpa|71!Tx0(&J-xyk8lt%BtT;mnzZ`3-BRQ>O!|(SU3`PX^^K8V2~y zBH;!XzvotND*LkDFECKg+hNdae_xMvlNCn3i&)G&TNI6WgNd&|{ST0-bm!y&$IGaw zthyB}dFpvCea&)H%w z7^5ShO?6S57ZxBnc*eQh?(r?s7~|@jzJ6x`PYZd~znnv3C-%GP$8nu?N0P~oy&=dF zoT~lBj#`Hz2H7dt^>P1BxWmB)&!Pa#AAMmm=>d{POL)=x5P7P=)8!jBF4dr?l)ebU zjHHenK3y(I(dc$9a`G-WXJ@{JsJeSR%=(KheI>>JWGnv;H2+~$Ay)9FWAUOFJKukJ zmi+vt{y`Od^}5N{q_<5~1)o?ncUeox8pa2>ZX)7V>N&>$&a)SEOjVvln~BXBiyx{e zP>~p-qSErYeH6SjgL|?X(=Z$?MQkPG92nE28o7valLO@Z;x|Js)ztdO0{y?=^J9tD z*>`3jsaXg9bkF`y_p!!!LR-;pPB_;c#kRfxK3i=BH=dQ6#I~3R4OwYZg+)ZUOd%7x&EwTF-KzD@>(dvxT zx!T@cki`beop-LDyTiS5PZPnnt=+yPz2S+!$c{{1bj_q~;Gh z$XE2(yU56AlNFA2ocqjjkj^U_H0FG#=#js>>J2J^HyF9ba0{HXJN|mf#;x${mU5#d z6%Cidm$gjn460vp0AtwmLDISUF6Q`3wpM7hkbs0GnC&+MdxXd$xLG`85=-t`bwv3= zKxVzZ{l||VK;jI);Dvs&@(4z9WSxGzKJhJ6RDVza+30u0Y*YggTN4oysR!PqZz zA0_+JR=8qysEU`Tn7j2%%IsRM9>di}ITcmBqx4S}E9gZ7RKs9tI!kfjXS~tdci~=P z3G{AXRmq9hCR=nq{83SUdNS12Bx=|;)s;smrQJGG>@!7tjWq0Z>w!1SgWnN;|r*jkV34MxrFR<{d(2?=0hvp9<-y)w^Eh z+1CykA2Ahv{|74Y+^)`1-o7Ula8lyxLYWl^&i8y+9%%5{k!6t<-ZOeDxe~*e%vU4^ z?ue9xzI2rv36Kc=7DUkZwcBe6P!$z_Y_=7S*btJO#M)S~dcCc8&>Ru0b-c`ZolzaN zYVgD5hrH@d{BxUJxLO6U5WCxT`$9M^=2CKWLK$F+7I;raAJwd#wq9OprTBoZ4@|uj z_eAt?fzzud`0qKWnv`eZH=RxGa-o4sM(*V!mfJyn~KFl-hA zVI>E*71lKOrUTv9;s&N)&z6mIJ_(Yg<`N@U2qp43lCewW(X6-Gnq5409vI!DI1 znyNZ@FZ~4wV1DAYGxD&KM4!A@l7?e7pK;E1k3F8)OKmk0`s0>OXp_C>ry* zV=WZDc7C9ow>Vfe&I{ea9-{mOF7!CdyB$oERA?dM2oukMubx;Gh}(4G zzK@G;$xTFw%&2!rQ*9qq>_$F+Q1x&%CThW(-zFW$ca;~Wp4TZEt`#B-;7x(PvPu^l zRg?FU^BYT$GmXp!?f;Ha1vGBePPHj@E?2fhHG`Q;y*bPImYSVJ8{Lw^4yTG6o5YL1 zz=yc#<#fgk)gu64`L~|?-|@hINL9yQ(SXV(lcQgxENl0F@2Sg#D4m$-GS}1;KtDyD z7YZke3UlLlrgekTKUE;a<*&m4z=Z>K%@G@M?@2-l8N409VHfXt+t$_sj2ieC(pf;1 zI)=t+eKF(DX8~PJrL;*-nYK)Sm1Y+%4x|QsQA6JEc()%;mDG|9zW|-wTaHjOBz!@3 zyI}vfFDF=EBGhn{g;~|j#KZ&`p~W#X0?~9A*m|*Jyz^8p6H-&i#h@`s(1&TXX|w#A zgbIb?!bW~vxN;_B0j)fG^YALqV6_GQOy-J9@_8b7-699Enb%KAj6&H-7Y_>Dl%2&_ z4hk0FE+b{jlM6|6L=WB?L_yY-tIq(zZ^*+;0p# z%~96S)f6LBP02-+wLO4O@{>xRMu^Q8ABr(F*UEMfZ_z1v=q&s`u%YEoRr7bwEs=j| zeyM(SNup*lc$ddiI`8B)%BnN)iqE$Zugd*ux+l#9O^{tzDJ#re1-0+rkPAnJ|%~y6$IE^+4sIstdJ$NMZj#DBnz>=2Q`2Q0WWt1<=F&N}Vx(AW8ym8kR!`vj$>aeXs;Ml^u+PU~bRe z?P{Ox{X_86g6%P{JGb0f5ruKyw5abEIyH?H2ssg&>Az-U>-6g05z6{1#Yh)<^zwMI z%^(5KSld>ZTa(XL8ETl#GWv?y(tACt%|LFzeJBo!aP>KK?Z$y-$M`X1Q(#c83+BFg zuMTl9UX+1fkdD1fZN-aBP8W9#L;J2YtvcY<@_L6cvWG%o;9qb5{JaCG)(Om)?nV_w z8q|fb4?8L&?Sat?3%y5XpDj$qWAPTa>F%pF2CwZdOd*U)p}zjWI-pl?8qp-~QI8$6 z4_bC3KQ5!-Wn+}g@D|N_VPfCM0eEGa4hvlgH_2-VTr*Peq||D^e;TQTX|N|wP~a%# z)JEQ625nLoU)ltvCvW`gjoQmDIV?R4DL3&-O*5r889ju)&+6_tXX57H_Ot}Kaf5@L zIptyBkeV|$7x4AarBl)kshKG*}s>wuc;P6-wp8_jh6)e$dlI$U#cun!eSHl zj~`rc8udUjI{-f8M`wAaNFb_42PMzQ zwfF0AUI}tnd5IE#D-LyFa3mPXW?wp?N#+~x(sQgEBDVgpoAWO6295pS;!?d2tDa7; zb_=!*ng~DIJ@~g|O0c0MSx~71jnYayseN&QMU4PCkRL1VS>PjnfdR@WC~#}dCgylx zT>R7hXYf3!VqpuJFe`{sMOXbVu_|hAa}DlZsgV{lkf5mvp!P+9H?pXjbgR!iH6uue zF+Kl%=1Tg+s|I0;Cqeyncu`$QjHZi#Df!C9B+Kd0WrC+bvf^8YXd+sbA*z&DbRG>c znz@9yToTfb7b`fsHX-Vc?3`|-h&yJxtbP16iE-x+YTZ65xS9EFSXdzJUN?aA*u|}{ zTG#pT>&CMU;2rdjAvSo0`P2cp=2QT~H_@k*6i1o0ukGzzC17=j14|5GsEPq-t$*TR zW|z*Ukj@FlR>4bYnd#bt$dTqj4=RDYqM;HzojafypoK1xvIGv|ufqodmy{9MseZo$ zTH7xfP-cxPB%eMDJ}~xHQ;zTQT8^0Fb#1~283=moLhZiYw7}UN@OL)levk4AbXv?{ zfyaMBCGvXdLDl6t-+0(xLf)#ckS+0>hx%KoXRH8P8(*WHT(;=*vs=kJ(;wQUz>=Ll zNY*b&8uept$W3hdkQZfLiDE%N=)5ChjHXA^2=h<=ER-DxJXo@ikYa4EXoUvKf>vUZ z(o9jfWS>> z*Z2pU7kTyR-Q?4rR#x4sBOP_C2yEQ5+tx8i)*2CDV90}&fKRNj=OXq|Dr?&jowFlt zp`DYW+2Q>49pg^g^DNJ+#0yOea9G+!6UQSRQZNSrncxM zxVt%&l6$__c;_-9XK-T$IV9hk&ZxwnP^IDOBj=GfH1<^3Y<-k!YgeCt@Ds!aSL|by zge<0jvPV(o>Z z$tSkGVAdg_LLjrCc0SqJ`#mhA5Ht{ZA4Cty%-)qOxORFCeo=6&!;bq6alX3%<@LZB zCPLU;nH*V1`;dmf+(3GM5X6vVV0VK8xV}RY(zyiY9+8mRM&6GNPLJu?CZh(A{zQ_A z%G@YtXL+MQ_|T6(z>H3@rTtb9tgL zht%OQ=#ojJw+|6DQzEY%f0-?)u2&nf?QDP}#S%|`caOdJOs0PlCp)gAo7*vCW<$vr zc{U}u9gSpjsLZrokYmNlxWVjR5j78knNKb7TIyj|P@OOtEv4+ocM zu6x=a{E%b@>KWSU5#)JN%sACp-g_R|IlJ8AhlXD^p^)bgOPAZ+UWm1x2Pl?UOowXQ z9CPE6qvxunufe;FsX+L3Xh%%U63oCEa>B-}dbt-eZ8I{n?2AR|+t!}3)=74R-M&c# zRnLb#*vxx?9Z7?D*6myVdt#hLa=2NDL%v{Y>LV1k8ee6}ws^B|)uRTfahpZJO>U`w z!uLkK72ep0gl$hxKRQ-aF^-F#S8vBv$TQ^-nH(}Tn|tM}<(jGW?^faF#}vS}!dkx= zSfkt-)$g-hjlAnCk9~63I8d#@jyIv1;r`kbm_k9}CC|Gq(glS39YT-JoLJaRQJ>YN5K zA*3j}r(gejsYL6$F9iurk`;8!iu8R3bygn$2#=G++S1>NexCbA+kJvRDG*PzV>$p@gG1tDiyGh zMGlY=b=8d*15m0&#n$rwtaAvq70-See>-hS@39I^cq2YbDT2AO<8m`4kzd0l9%Ea3 z^;J(X_ez20ok~ALV8DMBvb=)-=<0Z6X_naUk@mD;95;c!))I8 ziV>l(``bP_dvm_&qC4TZ@pSBXY!L;r-^6)PKg;CvZG%&;Wmks1EjX>v9=!*@WUjF- zzdBBADOwd{6niAh@mf&Ka6fsJ<$B3dgU%3494e8szQGwF3BM}z!WH%Z5+SX@tc%Uw z_Qbr=k-bEqu&)abt~;LS!z5!f>QTD9D{$q10gMW+glTV2Vi!aPL4j%!pX7CMjx!7JCud1ABLqrAJ2Al_MRD9jkwwfUv71)N9X##?=##B3dTM*fl z{_VXD0hz7P%;_M^*k<9l;GGSCIZp%a>gK|UwWh%Tkfe(UHv+%wOLTL zAgf;F1Ian;M8jwZ2rk<_a^+7={3g7u_KN~kqxkR>niyH2gJ!9U#D0D}2utr=QtKk$ z2s<-hxAN&%P6MPlkV+|&GbrHvmhOD6MrY{}C2OgT(e5Bdbeip%VyqGQ02aC|1~Jqs z;Pg0ymww?jK0sz#+vma`&gBhg%}b>ZZF4mJxaF-w=6)@Dx-}8RuEtk8Blx6Z&MV{F zNH;3DE-j?+D>1$4?EPrluD~^bNxk;ATSY8ifyRsMgEudVBzB>EA2{7)Ldalq0rgS) z*!p&w`iJ5$A1G_vH&W~U=+yRIwOpQz(x_&!ylkudxtnMmJG(?2`Y5N;b2B8^azTpu zykGTVG8XMZ9MS4(smr;|Auy-u{GGcuB2EUAs@5}+MM$n|I}F*7#;irTFT@@d2?g~b z4i-K!UZ;*&ay1R{#^^0tLMDjvwAmTiMC1hc(GpKtv(0OU}|IRkE_Bl7atv^}c3-`A;9 zS9_Mz*Vw}fZ)?5)Uwjp;>mjERw=!z(@tkumWV^Z{#*(w5*Oz2Ty6`fCUto3k)nAL zuKl#RLUH>aA+0slcinled5LW;{|d5TL&?MU!jMxSHs{{<^=op@ z!Rp1~`Cl@@_ke7cl;hCOkHmC|b5yy}&8!|`91y%p>VJj(sghPM76zm{qso`5=IWy7 zLJr{nwjA#p%wYlEen^{=@g>?_@BpY_Obos*_C6TLf>?Ouem?-wvJ!3PXO7ww=Az08 z$tatSIB7PJ*Bd3A)MafNxhXvh26+MnZP*Ij!o&GM1{XV&s13Z5b1YEr*){#0i;v8C z99p>UbYYK6)P=eN{9_L-my_RlACE8OJ`rQOnMHDgpFj%d)nrKFmrgnW*ONP+6{P?F zbNl~exxJdT0oQk1%WUOsz*0T`!tFoj9nOX6rq!mI)*?N8Zh@Q}(*+iYiX$ z9PUg#SXms1G!>4{fIJi*gX6k~VV4U*#nNZBaQy4V+fTO68w`xiOS$M&|um|#$J*6D{$wMxBaq0 z{{(prHZ}tynyWf1ThLS`06+W*Wawa<>RAEFc$L!&RN=Z0S|3^o3T%Cuo!vZFdZ^V7 zUEo7T`i{>o7EKQQj9znZFDSi1tohyJ14FbVQm>=aU5kE7!$=5HFgq8Bl}%l0*Q1^W zOv}Am9?~M0n8xC6IBX^iUGSb$z>;8s*&p&1sv@npleWs%x@$(mjk@}o^16dZmGh05 z7CmyNC>2#Pm*8H7+0$YI2rQbm=u4C>XXwlb(CwqhAU4c6G$$$v6z|(K^ep^9@tap&oj#2&C~k$QgAI%y1V?WU^XvJ6I7g{&;beZ#h!w^ zF_9Sbx-PTsa!VWNI0_j!Uxqo;xsZ!vg2(aX^I#(~O^-Zuc^YRGN-HRr63_1KvW$G>BI4bNaja z3=6?AKq62K19Mp!CF8~d<)x;v3HCyvON)OJEHJ@Ajywpy6xt33+qc8+1h>+Uv5`}X z3^PhqS>{YxMVE4oy9I!=E@D);@50b?_HsJ+rQ7+qh!m^9O-@gHt5UQc*~;>V6A#** z6c6?#SDQZw-%cMb(TET8mc_gqZT zr#1>4+z)~)LY_;~CBWoRe-~XRhquY7`@tjD`vwWY?Z@tcm4`zTwso`%lQIRSEjV>9 z$(>+o#zv@=mB$L87|23?#18=cTU8iU_Lnj|p#+MqN1HUZXaK-qEeGoaNy2ceHg$1aMbFmvs5?4@M^c_ug*h1F% zHuh`7v1`#@Y&jLJVVk4TX>gCW3J?lqzQ6qQoCi~--2X3>*8!sa|2}1P|NT-Z;NTGc z*y-_huLb7uxeQ>ut&w@+sX}?=!(q!V`97i+t+FC^Fw(I|k6>ey2`-HD7)_!zL6Zu5}tn<=c%_Xl9%Nl92d! z)uoRW_OYFUp<-v73T}lalWXmY6aW43j!QBBe!Sz}>L9}tnt~&!de4_emxK4hn~X*& zHq5&JeirC|q;UWwDN)J=KU1#n^I`#=8{BDoS!TWu2k(}wRC<7RQz#g zl?Nd<-||PdU|HUknsp^75v84V{?q?VA^p!wFo=NsI`P)Wx=KCyC+sKBc{jjy0n*!$ z1X?R<{^a4iJBZbK`4=B`*9NJoaBjM%fg#AoG4TcmcwrTttJ*G69)!?6?d6f;5>rudd9<=#P`Vk?;)Rk~eo={{ ze=J#%cC}>nW%NZGO?pP0_H4l+dS*cbR>I!6h?^(R91aK>z59!?y?hGi{Ak>-KO$8g zJ>{o3_h-BCyw!sI@yjA&<4@Wp*Nk2q#UuNe|12m_#ys6EY^4RBtXV*e;#$yyf!l|F1-b~m2D&q8fIk>c-5j+Ad z7@C^e3U9w64W&=5<>%161&^ByFn--!SQ$0@rK%!6xIwuHw4mf%`iqiNRWzj2l@eg# z0EFj1X@nnb7Q*xTy{b*|x8eDJyUI2ozrH4H;MAB&QEp~k;K$LxqVINz@&I=j;iOIZ)O=(KG`=zOHJ9 zcd9hKOz2Cw*PqQ0bMO0l(2_E@r5#Nn6gZX+G@i@XM8nL>a@dlQ z18`4r)6!x|uM>@a>QR-q+DcB9JvKUYip*_|Q--D`8L_vl zi}U74{SwaUk*e^{HZ?iR+lwVf9KQ{V>O6pdwtA;n(0gWhNS%acN4;5HG8`1Ka8e?6 za|wQKAHUL;L`T>l6Y@QHBiEu);|Elhrl!Z7AbM2WhB}VjT1EDcSvV!Z(7usl8tD`I zkLq?>h;m|-ya*$ZboI$Ja^sP-M>*0*fM|5{qBhsd&F-8|;{ zom%c4(Vi5adADN#X)XU+>RxWSM<+Kw$K9N%_^7$)U~fA=cc0TpWZm=E5U*R9b1Z00 zwl&9-tecDsOyEjCubdjU?qDK=HE0s_BQTlfOfR%stG;@NHyZyPHDMUCG zAfL^iu>dP1{mRH340Z#1BMw{6UYKf16Oqi!QZ7z;nQk&o zrFkVa?&X3iqKOyd3%;s(NS%LCNkX@J)tED80(xFFah%HgY%HXaZ)dGbsTvYP>v8{0(nj_|mo zyj@B}WJZ+m^hFm@^5Kr_l@@e{uJPuh^Vm;?^+OZNfiMFBPVjY3qH1L0Ry zh+o!_?sv+?qFil`=wJl0=%AIA~?)fHsSmb3(+vU)#l+tz9RV5&AD|pAPuCmgqWKffpqWfF}^9^v_K7G z8yc3;GsBJ2LDIn7gPwLaU)8?8192^G2bwLSoCIBpK_CqV|5`P>y;KLKc&Vm&GGdBg zgTXV>57i{pHuzUeW{VxiHR-v__>y03qL%O-^O`aKKrERoyHnXmY3(%23mEnNxwN|qKMQ)64}KXW<7YTXC|Xw_@y`M$P>N?sKTtOX&;N*Pn<3A1$vBA$dh(Q!m(Z(F(wsakNFfYYQ#E zSW0TEYBMkGo9R;m&RV&AB#gZs9+}SW(X#avGg;n=)~ym*yiO;{m^9K$-H1SmDK$tO^AQ+=BR^dJn92vmSuYO`hHUR1(7*zdMP5ccj92S8E)lyWWo!v9R~SiMhRL-OKiGOT!2@2Q|qDg z77^-=B{;u2T=?`ZkTRfiDOQ3z#^v4m)-fxQE{sOe_Ywrr_sLYbygY#rFd>W?kfj54 z!~HtiD1#ZvDt$M5*bRTE-M2Yv@LYJOm`Fx-P8k{9)mt(@0F!{e$<9{{+D8Y5duT!aEhOF!J}?@#>AbqR;H<7=|ryLKP9B^=1S+XkB6wGnv!{Js_y z-R_5rB6gbl=ZGH8mzo|69Sb%0k1cSe9$3Z+jet9eIMTnCS!Iq(HFomOf;qzkZqzE{ z!vch=0);+ip>KiMbXVM_7U^i>&mK|fImpl(%N`yUu7c7?L#^i$uKq(~jS>9d7vW9vMLHcE~t7W$529ia^kdvxCi2?3P zGF*PJ8Zu8TacnkEkw6$;W&7G&5FJ<@$iup1ZG%qEd>^V1{;!ICkN_t}R9yeA^8XAU z`Ts)B-{*Av8$u2Q(x_C9YCrenE{CYU?KmXS`W;6%;zs+aFQSSB)X=U8%8wTGlOJL_ zE(^E?@I4BE!zlX*dKxwz<{l?p7smh8(|~p@-4@_0eWN&Bz>7q(%_ME8e+uRBnRgQF zMPGlGHJTcUAkBR3gaYxy4zF2i$bc)^L}*4Ap40pRIYQ=IM%ZEu}>LDTO1hEI-66P`dShwoLr6MSWj3Ztbl-R5rTLpw_&2_L02pf<`4( zgril^{E*z4n)CAGVrURaLd9qb_~*mUUEli^^7JcG@Gy2iZPP{FCdvUDcI)pBZl7hA zxPPm?`wNQsqtB)XO>R`5v8GtaPcBm12)Ny7-ZfWwXV4)KOmXVGb1W-}l8s|e$ zC?B!r%N0X760+yUna_V=FtQ;n3Te1ahZm1x5_r+HS zv?P&71UHtGx_~239gMlV1JBm1nm( z8*Uj>((c=IXezU41+F~&-h~$uqB4{1nAfbW?KbVR_QctqEoS-N%J8ism=mwaeS)?! zHq$LHk9dp1$iCN*3VCn@Gu6sY|7iFSxWxqBw7+#GOAb5q_wBKtRGa${J@3pWEKCHc z+Ep*J{X!#|j~5?nJdKJGb*nlSk8gHn9>82DtqX(GwJsu5kQ*c0ggzdlz;z~%dVpm4 z)=@he@`F?7#S!$H3P39-3Zu%xEUd^Qo_DTZ$~M7n%W@t}CujE3Eu{5iMFyYO5lqIj zA$$jU5VV$@a4Yo7TmxJUV^HzB*rAyt+;oiJ;=aPOA)}ITBB^`-De%1wx#488WbsSe z6N7;Z*A@HSIKoic7A#AK;HT)w>_l`F%4e*G?|7MqL3>AHADy+lcLoe`$9GS`q!dfx znsRxkCBOhiOi31Wf3QcM)3=g~^kii~^EmV-ctQCkYah+GV!Mv}rm?1`xSXyPcDM4X zzTxjTo98*yx(tB2X1J7Ou|^}a)@=Kar!{BSJZ5>lrtr zx&tE?LAw^HsOzY=3gw1^Bc6~|=ygylXtu(Dc;JE|!~<54FtzSPcjj7S_J%;*Tbz<@ z3n!$?&xQen*&v4g_#{F7zsmfQJQh(OJ@6n5QtSMNsA=#)lTQ+A-IF8qhn<(4`yeLJ zT&zOwFI&q^8BSRnx0HkHk3(E3{i*rOil#1X1gnyl<>&6u%Tpm6kihFynWpsTzWl9baea)%p#r|*q2s}&oC9y;=@rY#dLiw_UIDbu zX^le^G~G>zoLYWt_OJ4sG`})JLkU73S{Z*5l5#T3_u>?UtCuu> zC8U!ULTMKITcw>U(<&!t?2i9C8D9d3=o_(jgmNc*=AFZOxh6-uSN+)GXHUT~7eh>@ zg@AZlVvzdi;ZT6Xqa}{%|66vUoK%T3Lmq38mW`5Zjzi63L3i&P|L3eSMT)DFiER8lhI`Z^{sS19 zn=S)~@>~pMjn~jnMj{NSN!F5gMEE3r)ez8>?1z&*Wb_dXqHGfFoH3{K=u+D+PA}vt z7ILJai8}rN6#};lV2a%*U2-yVCOj3IX$TzE_UBtccDrHUCLllL#SL9rznL^n(&rA}xCs$w{MA`_D?T&17h6j_8 z$?(U$H)ZUF>2N=y$hq8(3S8TJY!33~!cdL=ngY04MXW)BYn44sk(d^Wc$=Sf|5{pC zz)xAc()E?;xp*7IeB967KpHexT2TTYjBG`G@2!X(?+RF6v2Qwa=DYK6Urua%ce@?B zM=#r3Ea{ovYw(eK{8hzD6bt#9`}u2+xC-zL?t(#^YcLKhTjjb(Jdd^~Rq~RH_eHQ{e8zp3OgcF->rC#D=WNw?yiEOl#dVixldPwA4WcC!QwVPr+hk)>$kEb z#m`1?WO01WeKN{<%~ifUs7`-22PX%nKl=LQJw}&yP;Nr8hx&R}yCZbMB5>_At>#c9^W5A)kVh63mf+mAZMVmFf&OhV^tbi$_AtHc{`}&71rJz| zoshb@5w|VbymJ5iY=4}wSz+o3bb+yp2Zg(R#?JJ50IQPi7M*YnP@B3|Z*_)0%vM~j ziz_?FYUSj(FusmDc$eK1 z3UW^H)TC*NI~n*@+Bn8ouw)F{RMRYM!bysKIpiVKb&YPNorp z?E-6!n$QIPMA7V1=>SDKohp3iBkY6nzzM3=mBwplx04Z!k>IC!!)M^*vG2rZ3)Ha! z#NJw~OEhsC&9e5=ss4I*-JCePx5Y66sjcwTObM9xprKv;t8%*S9eXm20EwW&YVj;~ zcD7jA!Q`r465NBVj7p|R-d}66a=329Q*+3X@bE1mF>LmEg0E%-Gl?IqD;rBO8?NQ& z2&HC#3wzxu**RfM%Wiop;S4-5FWP@8-=T_81hPiUoXU zRs0gEfTZ~*l5aiBQeLv2tyT*2o||pW_%!MI)9~Ah)$7Bivw2iZqJa4$_j*2Na#GmC z5LxP`UdS++C-RA-l?Zy+=IUVWBroU~t;k1A!3ZNeDu?oc3%Uh-69Nd4wP3Gqza|f< zO&g->-aY}wN^nntwLfS&g>#+Iqt$6IHyGQKdEuj`_b#!~fTm8^r`5%H@jpBK%j4tv PzPi_I|E_9}A1?kUH@t0> literal 0 HcmV?d00001 From ea0650b44a3febb307c6fbcdc3b0c2fc0efa6358 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 19:53:59 -0700 Subject: [PATCH 33/48] Update README.md --- Project2-Character-Recognition/README.md | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/Project2-Character-Recognition/README.md b/Project2-Character-Recognition/README.md index 4503fac..f7950ee 100644 --- a/Project2-Character-Recognition/README.md +++ b/Project2-Character-Recognition/README.md @@ -1,14 +1,17 @@ CUDA Character Recognition ====================== - **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 2** -* (TODO) YOUR NAME HERE - * (TODO) [LinkedIn](), [personal website](), [twitter](), etc. -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +Dhruv Karthik: [LinkedIn](https://www.linkedin.com/in/dhruv_karthik/) -### (TODO: Your README) +Tested on: Windows 10 Home, Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz, 16GM, GTX 2070 - Compute Capability 7.5 +____________________________________________________________________________________ +![Developer](https://img.shields.io/badge/Developer-Dhruv-0f97ff.svg?style=flat) ![CUDA 10.1](https://img.shields.io/badge/CUDA-10.1-yellow.svg) ![Built](https://img.shields.io/appveyor/ci/gruntjs/grunt.svg) ![Issues](https://img.shields.io/badge/issues-none-green.svg) +____________________________________________________________________________________ +## Outcome +### XOR Convergence +![](img/charreg.PNG) -Include analysis, etc. (Remember, this is public, so don't put -anything here that you don't want to share with the world.) +## Implementation Details +### Variable Neural Network Builder & Batched Updates From 2a5d0cafe7f1c524ad8bca8a5c64e2a304dd7149 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 19:54:32 -0700 Subject: [PATCH 34/48] Update README.md --- Project2-Character-Recognition/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project2-Character-Recognition/README.md b/Project2-Character-Recognition/README.md index f7950ee..6b7421b 100644 --- a/Project2-Character-Recognition/README.md +++ b/Project2-Character-Recognition/README.md @@ -10,7 +10,7 @@ ________________________________________________________________________________ ____________________________________________________________________________________ ## Outcome ### XOR Convergence -![](img/charreg.PNG) +![](img/chareg.PNG) ## Implementation Details ### Variable Neural Network Builder & Batched Updates From e4dda37802d4520beb694d466664cf178986624b Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 19:56:52 -0700 Subject: [PATCH 35/48] Update README.md --- Project2-Character-Recognition/README.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Project2-Character-Recognition/README.md b/Project2-Character-Recognition/README.md index 6b7421b..b370f9c 100644 --- a/Project2-Character-Recognition/README.md +++ b/Project2-Character-Recognition/README.md @@ -12,6 +12,15 @@ ________________________________________________________________________________ ### XOR Convergence ![](img/chareg.PNG) -## Implementation Details -### Variable Neural Network Builder & Batched Updates +## Additional Implementation Features +### Variable MLP Builder & Batched Updates +Define any MLP as follows: +```C++ + //Network Structure + int numSamples = 1; + int inputDim = 2; + int numLayers = 1; + int hiddenDim[1] = {5}; + int outputDim = 2; +``` From b8d822226d88225b18c72504402597242b3773f9 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 20:10:33 -0700 Subject: [PATCH 36/48] Update README.md --- Project2-Character-Recognition/README.md | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/Project2-Character-Recognition/README.md b/Project2-Character-Recognition/README.md index b370f9c..96f21ee 100644 --- a/Project2-Character-Recognition/README.md +++ b/Project2-Character-Recognition/README.md @@ -15,12 +15,20 @@ ________________________________________________________________________________ ## Additional Implementation Features ### Variable MLP Builder & Batched Updates -Define any MLP as follows: +Define any MLP very easily as follows: ```C++ //Network Structure - int numSamples = 1; - int inputDim = 2; - int numLayers = 1; - int hiddenDim[1] = {5}; - int outputDim = 2; +int numSamples = 1; +int inputDim = 2; +int numLayers = 1; +int hiddenDim[1] = {5}; +int outputDim = 2; ``` +Notice ```numSamples```. This allows you to set the batchSize of the Neural Network to perform Batched Gradient Descent, as opposed to stochastic gradient descent which is the base implementation. + +## Tragic Historical Significance of the XOR Problem +Neural Networks are not new.In 1958, [Frank Rosenblatt](https://en.wikipedia.org/wiki/Frank_Rosenblatt) proposed a hypothetical model of a brain's nervous system and coined it the *perceptron*. Essentially, this model fit a line to a dataset. However, as seen below, you can't fit a line to an XOR function. + +![](img/goodperceptron.PNG) + +The perceptron got a ton of hype in the 60's, but two authors published a [book](https://mitpress.mit.edu/books/perceptrons) on emphasizing why perceptron's are terrible, because they can't fit the XOR function. This single handedly resulted in the first of three AI Winters. From 8a8b6a7ebfaf193b18371cee0383eb217c079168 Mon Sep 17 00:00:00 2001 From: botforge Date: Sun, 22 Sep 2019 20:10:51 -0700 Subject: [PATCH 37/48] done readmes --- .../img/goodperceptron.png | Bin 0 -> 17135 bytes Project2-Stream-Compaction/img/perceptron.png | Bin 0 -> 75183 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 Project2-Stream-Compaction/img/goodperceptron.png create mode 100644 Project2-Stream-Compaction/img/perceptron.png diff --git a/Project2-Stream-Compaction/img/goodperceptron.png b/Project2-Stream-Compaction/img/goodperceptron.png new file mode 100644 index 0000000000000000000000000000000000000000..1f7cec6553cf01096100fa238d3d6510cbc106d4 GIT binary patch literal 17135 zcmc(Hc{J2*-1oGLLMUZT$r7RLdlA`%EZLKN-^Z?!G=wB1TS!7k_H_u^Lb4l6)`>AD z`)=Ov)bl*&dCq&@_pkSyw{!0M&dtnRziat^KidU$SL4oUat3k)0&!Yd>9#fkap(a4 zzD#x;{(V)W|2F*RsE3^LJu>(cKxX+IzCP)ybl(Gkpr9rFa|rP^`8)z~5utqh);*tO z{MazkxIb}g-=#BGSy$UGRprYyjGPGHk#FDBlxbff@N)_8Pe#NwsGGg)n4#J-sY4P@ zSE?1pnQF<6D0A}c|2+Deo%xBfz;W8km@oNl1H29eI;+O(rq4ZI^0vi0Dho@emDX^* ze`R02*XO%2ygTfVbFVa}WP3_2_xf019pgp#%k#0zC^Is)r(Bm`ZH2;@H!l5u`qJV% z^2k%_ELAN!#XENn*}Ay2;JvZsx8%qwr|IPulY4r!{QUfW&(HTrAmpC!y=vlrI&?ie z;|B(#>f*v5o-tD`E=R_`OaTvPr7*Wu&K09Xed@RtU1(O;6^o+t2)(?xg$4UjXVJL0 zI0N2uh{KN&4<9~!-)m>@;Be?oN=mc2z>k4}XBfO<;b3O({K^KBCX4f2L{UY>^9-)1 za%?oHqlj}6$Fh4-82q>CW%E+Q$NW=5aVnF`K8QDIX`D1Dgn2n8CoiuN@BOfNSRhdi zG2<@&WtcZSLqJAG<03Vp-txwSa?Ay0<|{NP_)+BUHR>x|bmVThfooqU1td}^!y0cr ze0Z5M?8qZmv6_WljqKQ+9X}?@6R>0oTIU%Vdll2x{G^YE94g1CJ2>#bgCO2(b6<)f zTlPu5cU3`%;R+pjOvmhJ4E~;;Uct^D9i#G|5#6cdmEL%U+S*#<3fIRuIoyc1NlD_~ zi|1w1qEE2++*DnGX{cHL#t6??w>Qfl+iAVsn|APY%o!V}Z`y!eeISx_{rsuzJqHv` zaY;!NXIg+9O!#1-nazU-haSCs%TQHS^=CVPNj4~XZq6FWin zF`i0h>rqkBb%cb3#MVjz`g_dfJAeFYD~$ONFgvK*PZ}C-btQ{8#_{MQTq})zYE9`B z9ymKk!5^s_ij2O|DzAw0aleT4bhcB+kL+#|u-n+Q?BZgb-2;pZy7SK;0ghNySC>ZJ zMrk3;Z`-Wkse7PGcqxM8?L(5VtR#QB+if1;0W~CLk`} zwvHQA&hGW}^mP6GgDr|xRzp=)b$mQxmkkvh+b6jXpDJ^(^{~JUU1#5=ozkJkPBSug z*UIWjc!ulz&nxh>J@cM#I<0qhckP^=Ias2xxWdaS_ju1W5;i^77j$KIeqTO*{CJ5L z_tqti;$dE9X6C;B{s#i*OA4c3ym+a^?kbVO%g09(-BOySPslaqqvuPAFy;%JAPmaT z9yxO4dsmlz$`g2!bgpm_@{3L0u%l6^ld$N~GCN#&JRZTz%j>#4rQkeXljC)~4-4=5 zr;Rhsb#+cXK0e-6j0qkX25^@yw5m!%JBJQl!cKGY96kL77M8iW&ZJXRRDwc6u-w_X zx#5l?N!jS_jh9Vm<%N`k7S`bJX~aDnH4fTrjg+bS$43ySPM!KQVB~ULTKe?O8;S3$ ztCNiR?ntCq`}w_zIh0*Y7$x%_1i{6s3XYO5X&NnI-^WWeNqhy{Mwn@t> zD~pHC3s2bw8-*;akx9zuWm%cXjl@%zFJIn+ePwBDOBUVI*hnQy+F=I9#xtVm?bEQi zM?4=>Z{`&h#pq;fXzA)IXlhak3JM+#xs~t1aO!wsQc|O>&`K76Ac`~9=Lwv+XJKL6 z+ugxkt*vBEPEJzmeL9O{)d?`^LCXfq~Mng@=dl^O@GP z!Yk%I843qGE=Nm!g@5F9b#+rGBK>L$IW>NajO3nGIa!nf=Ov7a^+RcCTw&09o)}Y< zu3Iko@et!$zqpYR!~AzA-@&&M%z_fLQ~koF{WpHOSAP8X(cInLUH7pm7u^)>zqQRx zTFS7T+}x?wn9EG!?zeXnd$TpC*Zt<_=Z7QFRa}?*`}>(By9z6_ewt>gAjMw@XVelp8<%NvdP}^%w3nRbfrbT3@ z1-*qb`9krb7QH2D0t|S+_4U!gmWR>(7#pk2ZNYk1i@VQ|g=hRZ*d+)`OV`>v_v5{% zSGUF7rjBZ4(-sdWN28x{xxu2qJHdG=cOLtxn0s%*Z^u(YU5tr?i859ryOfaU=C}4+ z>ysXjVrEQuIQgjaX6_r^>^i7^P;(YB<}om}xl7&l+*0kK4Bn4! zxm@I=-Kk$_dhA!s9hS*ZdQ!r_};yHpSHzg+n)B$ zH%+>l3$A{zznG zBz!D2+0hF3S#qcXBa!;*>YXORXulNEL^V$&vUgzM!`b}!!#P(m91A~QHkcbA$&|8t zA3(8tqq)c>%oe=2+yW;yba~k=qv!WqY|gUnjYPfXOpWYDa{+o45#9(K$N(#T)Z%6 z3mYjYD4_pLE&Bjsfu0O)8-~8U?VC^SD)?pi(}pkC#+3jb<*A7bbAeW9mQbC{N@LpE zbAvN`KCV;iA5SzCH~HTGF5`2Kn-4Ud)PNP`zxBmu>f{Ip(`H?4dE)}?l?L;dusbz6MduAD!0tPK=NLLk8(&h}A*@#_nlXrgprU?4m!C5zPa-uY2&g7$O#l&cujutL4sPfMQ@ z)Hq<=4!QB=x#{VT3xoG@@W}_8!NdfI!GbfVj+YWN)ztpr%!qG`ZMtuWi*uvQ9P>g$ zkIC%)k%tN7ulaKr_V-8-Ej^#Hxwm)xhYuW3QGIvjvx^LP_vST&4QaDiS62i6G@MA5 z_K%0zt}!L#4s!QPoK@aSdaoCg>;EToCpJ=9UDy+4hHx;a(4xhl_O zn>Dl(xGqF0PA!XxI*(oiOj&BE%n@sh4l+q9Ff=x{1h_oL%q|+PvrgGM=}Lk(bPALF z!P^wEv9SR#mJz}Nfq;$GlzCuo{^%}ONoKH(mQ3GN-zv77RNw(BpP*uC!meHy77o?O zo+iB#PK>9Qm)qW!8v`Tb{Nm!w{^o>@we_B$N<9bH_qv$MmXX{{8e?W(#9ES-wQ5-DEj?_U!pn$DOvuEsB0E`;^$_#OjjTUfI~uCA^QwcBI1{cO-a&dj0LVBs!$5I38ZU^+F^77O1huBcGD zk;o1#1E5>X7wvy|G01?jF3usdpP;X=4;8y4B}MPRm@gqSH&^9V-H;Z~v=AM6{?!V> z^QWLqI8m(vyj)mV_-w#yz#A|5EoaAHF!2ZyYn z>1CVZ`%bcMnLR$;ElfUjY@9*!wQR9eWk-iEHIlaw)3hMDl(3E$ziR73nS&`R* zvK92~4U9|XAP%ZzHgKK^by_*P=QW%MwnDkgls|iV*iZno-u2GI)W8}8c!ecUVwV$Q zXa;g9#4!7ki>=*woR;|WCk@mNME$IL+q{XKV+g>~)WD7V89iLbLmorDfMy6?0p3FS zK<;%Be1MNs*Z1!)HVnwzh;VR$+bOX>X|`bdTlfjg{c+iK zK#Q?$@1@VljmqZrs{sD=UV>fOLfE8@Cb1f^6b(4#ZemQn&N@Ih6wNjjj^ARBRfduMWzlT?TNzx(4jRq@Jg+#=RS5Ryg>u$e^SpG&d&) z1v~?2_{Y4w%Pi5cO6)&y>qGdBC(hMFHTZCcQtEs0kK^O7aRvdWXJUdX{q`=`)0XM< z(o!$Z5ss!PE$j@xNmWy&2NqUXYID?Mb8oj#(t6!3T{~~zqml{uAwZ11J*h;s;|U$1 zjgzD!c&t35V)34F2hES4UuK(RWn{V%1#R8{R?Ewa$h;H%s?!>o;Uu^C2Ab5tMmgGT zWAXln4kK5XmVWr?RpoET$j4W+aYRwFKw|Ma-9Eq1i zyxLN2F`*5XV!5M5hwt8A#co3rR1rA;zOF7+jU)5e9hoMJ>(H_u-AgIB%n05<^ST7BMgbq7N;=HFsj)X~=-wDK@CR$BUdE&9>X77k3U5RZIkdpe& z=u*=FkCK>{MtZ7G&v$iSAa4WgC@A0qU4~G%%Xcm!G~@);x$n70<0P)6pQFP#iYT)$ z1{hBeQtFI@$$R6&cKRc4!PdB*sP|L`%89qc0x6nD>d0Ji*>7I zZrpd@=T<|i??Jf%cJ$-N4+N}ou}BFW*-JjPiGbBkr|q$Ym%nQ%Bgcatk9Fo8DIH;rk8Ytn9-^kM9;3zqf=ypvA82T+ zdiXSrY@ik+)MXvBS9g8%@Rr7TXrk#utpZtOLUi}Obm6ond}i@*jjn%gOLY5=wp-!K z{nu47aY%!Ou~BDIk20z4v1O-0kt7q?yWttXfcyRaZ4hhh9Wm?(Ep{5fF+(IweT}9p zT`oS#A?NpmIL{qj_sZn5-Z>5Q#OCX*-g%*UPd|PBWRUwLByvw%0liUY#BAYFEuU&(q&|rR-aUZ7IM0gJ=35uS-zyB~xSk20j1S+0szu99@ z`+LU6HFS0R3yjIu{PuI1_xqyifm*;ia)rxzd94?)AH?Y{*e`ZpEL6_2 z_&yNo`n5Hoi*5ncV^I#32ab_dBMpR2hivPw?k5JF%XL5mF)BPFxMZGZpW zsJbP*?m_~;+41=7wI>Z1iws`eyBgYTF$rk2M{ik)$psxJ>W!?%CE&UTfCT-S3}gL? z!F2ri@lu32WBl_@fK00I+yzh+y=T@VIF=h%6mH?0KlIvxN(<=5QzAcwr(o%NE#@*! zsGPLwt(0fOzs2t+cXX=^w=+Bra7l3}HMFy{d+^}Ftvmu8T4)|liEqZ7CyE3moTI1z zi$=FumQX&~sR#vz6(+P;V z#6m5k;U=tk)I@V!)oxCJvx^IMZN8Tzr{vWwxmVgcIDoE(Ev1Dr_6=FL%8Ykor|HQp z*<2c{#-q$q28N1ls3!K8kXm%dV%tc4iv*bAxI$ru>GA<4Pm*eK?va;cOkDk`=!*08 z);iX09PI7weSq4|FD-$7;1C+k5qlEq69)gGpuoAs6m3gGY|ovJL$UtKjF$Q8Tn^yY zX9~qxqVxXh)vF~jkF>Lw3V#d@RV;)+IVqhKPU~)gf>gV(`xDny80Tj4T(7L#R*2Nr zK_qLl)K=ZW1Y!$&=RXz{e4FP!DFE0?8?6NZ&+y!*+-LsAE=xmq!ye@OE+5L@L5qE#P zjo12%9Wfe|z>OOjPO&L!9LSk2Gieorz)z{yfBgI@Cz6zvm31tyziQ6c!rw`_V0K4` zX^DF|&%>T{ny~|V$HRMUo6H41U1&G+O~0y-+`ZX$!g+V&651rk3*k)M1?d;+{>iY$ zk)qM{^>vIXK6n?Gn@8Da=~6sgl2wzn7Bo?q5{}D%iPN0KtjtWdd+mW-4@EGcQN8by z4EMsBO_gis#wfRqX)2O{3v7i2Z=GNXT+@P51c>u!NCR{5n4^fQ6bo1k)TrCfn=O3D z#6G=r(l@4;UH(B?-sw+$nG6y zT`#H-tRCx1mG&>Wg8VE+P94z1D@V;ajy!A2A`KLPEAWeio(pDBSJ^-56*w zBkog$1qE|k%hOCU0dE{dBKqyKK76p|9Pz%hVB7;Dm+yMN{-O3S5^K(Vxo+!41|Rqm zF|Y-REBwU6qnKTt0_1HFU26V(r5KrRZdT|@lclOh(Q?|_*$od2>TI4B3)m9)Y^G0rBDtL1XValwO@H`=~_Jq!g} zK;R~bNFc@Es4+U+SHYJ(#OJj=#-c{1MFS)-KR+K}Fi4bd-oCwbE@HO&zT9&!r78L; znK+yjQ3deK-n8t&WwY&eeSmh~zkj!7v_nl@KD(c#{%j|7==iJ*(}JrQ>CoGk(olvy zo=sVUjtSJc-u+5l<7BIiU??@borRI(onxNde|)6CIx+T_5*BW*Ce%^Xdvr@@pFJ8r zb$4mmI2Q~pH&#m}&r(btA*A6VxGtP28mqrTsI<+@qMt%~s3jfNvZ1Q#`ehbKJk1d^o7JLU#Wn0Q4E|u8|j|pq>6b+DCl$@Nd!CKY3 zmj+*?Hg|M9g(w`1=$LJX`QYO5Roz>l{sqeevT0*E~-gM8QBwsSb|BNkfg3|_2-^$bTHHco|gr#VJ4p{Oq2uRM+ zBrXPbx6wDy%cqxZiLCz&v3bw8iHR3LsSRtkXfPL;DTOa_SCW>bR$BG`_TT`uGu%q|?yO zLALljdI1_TxKW5!HdeXUT;-Tf7_|}?-B(*7*ceu+XoT{eGNIW(`qL53a_Z`l8(RbU zMm4Kll8At5#t8rb%D+%gIFX7*z<|E0^4H!LI8P#4ZI(0AyaGIgtyurdD ze>8i6s{S}~=}4<}@kl_By83BNspX^o8;g^B+pBvAn`kTy%GWua8p$D_vH8uS=&qTW zCfFX<#fkr+;=D*rWdYB2b*$zKC!7Lq0&n3(+n*wP@a9m;&3&0a zo0?Gvo?~kYBvIfw%R$b z%ep4!JuQk1Rz|hi;@ez_`@Qp?BPC|?@*xAm(SQKcWnS$e)0({Ou2wV^25sSRxY9z{ zc%XT;$MGOY)e9(1B{i9roTmgcAd{^C8K{`v)hU2HKz{Gtdpe|eY}Za3d(hVc#v93& zx}4B435G#VPR!|`+qYiod@s`k4Y^x-GdX^1kWoEm(+Alm6+F*9)Ku8DpZ~gJ7BspS z(a}0OI`Xjm-qq5Y*XX7wVDGW~D_AiqSE>FAUXF!CuYs#)3qAi#Ir?sN$X0Wlb0 z!d5_CfN0VCHZD~=p0`Z|@9O340FW+ab{7SmR3`w|@SZ#JBmB`*>Sp!AK?fdWU-#D5 zmKgY_wnF3=lRC)UqQf~<->Pwdyin2tgctTd$cs?A5WvYnAzc|()B5m!#n%Q_4s1MV zzf5ATh!l;m#>tnGKTR&e>gU_W21IMa<}U#)5N zv_5gdq6nHER3{q{W#QRA={c|%aks}AUqn6w88AOzeV1r1pd^t39@i>p>peeyT+H?* zFH%-O6P#34DJFIX%6&G5ZNP&Cq)Mi3zhUVTvGP-I>zi?3J{(WUGItz1i#G@l8C}DQtAZX?4o!e>~>sb;we`*Nt zO%j;_!}Z|Sp@F$qj^;Mju-_j`!bslDD4`3S(#P&aDrSlly4*;t@WPz~NRI%+R&|QL zT8{5kwY&+|Okcl6o=W;CZ}fq}r86K9Jer-Y1v78O#Xb7Hpix!0%yJ$q8b_?Me@#V2 z>mAm>qxB@e6(GgT;fvni+b8d$ipKyxgE#h*MN?DL1MFTf`8B22>7Xh?18eb_ID(r0 zsjH0&wsM7yHtJknhDBbauD36>Zw#hsm7|l5X#TSBd_fG4#u{3gW-u$yk``T66b+~tRt?B6N9-gNM=-^~w*9L{1-VVkOc{v1P=pvq2<(VI zi^bjc#~vlBJyT*o%u6_Q5f$uV8HE;#bDSE4@^x?k(gM5>I}O|UQ$TQlZ+<8)j)j>I z5Y>H6R(W$St*Sk{tV|DsnV5A4=iveq6Lf$gmkAjumS{(O?J*imR+c3#QN_L)-8gwM zsq==I*mKju1m=ULvR4rgCqY62j|9=t;zdPCxzZwg0PMLjg1=75$|}pdJ$vX))#WU% zYgRE^lM#}SL0<+V3Mw_o1aRyjc2a8^4FVx=TUpEg@;6|6X7)gA=P8)~KyzUNp`B31z zCD|^YWwXls)#@go5hV|Ty@L25(E-ZWuU{8o&yolhq&j9Qvj7qu^PB|njEYI@(cgSe z@91bU!TRhm*oivcC{{iFJ)?X*FjB$EhjR^o0W;CHE{O{|g1ybe^m1z@bH-wv)s$J= z-|iRPJ8$XiOqtoEB*f5(EWRBBo#A%QE7IoLKuy>RfsO_7hs@$)T7&Ulm%)?(@d~R{ ziiUKR_T3GXSwFZV^S#-nEr3DpavkNRKc3h*7a*cZC&2xLCQC!HSS5n}pgX7kAlFsw z#RP$qi4p=ykmP{FIt?{=NJXplJ(m8p&(h~H!r>K`2A zD@JR2_wTnIp^|#HkDegd1O=sn9t&D0>??Tsj&Z0#GsuMJn43c zj@xNC4Ys|g2`C$ohynGj5*(mO*EbLa0P~|;mRM7^t)8RG5es{iq()fV*Z|P`1(n_k zDEYSY+o7kxv~DC?y1Kp;wCVaWIC$a_M2DcU=EZxCQ&ipz4;VsKn2I;?x1mc$g_rMS zYRzG>^WdTZDYk$Y-Mn~{orV-%0`(n^BMs_x^Y*~;kXwx7X@B9igVEl(se$|++t(n( zB7kzY2iePM(Lp|^!gca6EiElrv9H=J&tJGO`8AXR0pe^%iIOorXQ?$W)FGSJMMeCO z2`v#Ogf-yLA#8UVcB(l7!d~tRecVu(U250QLH`3k4TfHR4XGBYAw4b#DyN9ZvtdWR z-}nIfbLX}o33a@lR#WPhk&#iV(QC8m>FH!~k7qESluTkIZU&1B<5&SF!vjB3daY%- z-8YE)Db4n0TG9+ls^?#bP$Y>ujv|h zDndrV9Ci-05CF4!dBQ}L>_2r6S`mNk8V$H$O}O=p-g%$x*#y!C$d=vb21P_c>n`1~ zgboD-h2|&`sR{rpl`;RVPAs#=R8_* z3$g`})qyMr92ex8m#EsFy^Ci495e{2KlMlQ4ub?KC@ZT87#@OD5b1%Tz>%zewdKO< zH`0kH{p;@GiP*WhlDA>5Vrqar0h#$0ZsB{r1yTy8JEv0+@V& znpb6ViTY0!7UF@R!kP@zIb%frfKmh*F1d7Sa1N`3@C^2GRZuJG$nC0s1BZhm4tAbz zWv@u08XOSd4d8}A0@Kb36KPKbFb#rucm^ayO9z+mc(5Sx%lNW~DK6J@K7M=$9uACH zm&gLk3f?_5#U;Cv1#m$ZnDR5Y^5%3+_~;==2T&C-d&h=z&RtnTs5R%*j%oN#-&4Zo zE-BusnY~;fzCtbyU z%zGN1`GsdqPN{mt4^?jR>oHOt^v1s}tA6*^00E%@pVdxL{ryqzd^7%Yf6W1P0KDb` zwhA9L{+b?86>~m)8vRMAPOHA`6;)PVuD{C+KiC*XwVFZ*I03RS6cbE-I>q{`sco5L zLH|T$e}=A?>|@x^LE9Z>?%+@CvTZe~(% zGPa8Tix16vj_b5|Nlv*~@g^+fzS-4MO53z3GI0y!C#>tO-Vv|Z-$~ZU29*x%Nhfli z*tSn^2S!iC zIqrYz^yxDZ&A)&D{sj>(bLeuw!bwT6ODlNy7RiG+Vl5+N4?&k^i9P|My>z}&*k2}; zZ$6k=ShISfL|q89Z3viv8Uaz~lJsP}1!(8a&L4`3+;lBq%|`yzL64lzPBUHKcdvHP zj?jy3o5|hhfJrDWE+#dB=oX}tcrFIdSN)(npKGn#Kb`A!g&s0()kkWi{csWZzFH}&9b%d5_Qq*Ls=_JGsOARYi3>bn2jZmE%Ykd?$K&aZkxIVlLN1RF8 z?=|FPNbLYD(~@hR$S0z4k^_ z)mxZ$y9~P8rfJF5fowQ_iE3c+Lnr;~iOK`z105v&KNVsOJ_;2)OYuyO?f8ikfW-T* zZUW(ixYWI?Sz~S@qM|OlV~*GJK7AtPVu7Q9&rfzdWQ0b!^t?^AtT7VzDQ<- z;kO?X6KMO=BfKq0K-8caki4?t;hgpF1VtKjYeGu6!|(^B&8JT(I;#T(8vs3?#(gRKiF z4idiJVn=v2sTOBc$cm&cdCjz*Aw^e7IbY~2C0XWwniaW#X(vnhz9JOHYndFce8rmsF?t1(9Mf@)lchg~{@KSnl)dhjX~d^$U!F_!WcX z2gDBnJ=DtGody~zxTcUZx7rUvOk&1RsPP05_+u>!p?+wx<&SuE4Kk{p+$khhJw%!9Z|bln@Bl}L%rYE9 zQXK2x;6Qhm0a9zgXGjUh{QO7&ki6$Q;!Wi@MChxXIKiH~i*8!$0`p=HLd~;(CL8PB zoee5ir!5%&O1vq=$xGaP8W)>VvAJRD(K< z-?X+&IFb{l@K2wP=5|RWnzKJOsMlf^027Q@~eF*C_X>kj`${)$ zSYrbaeRyVT(1|f9Uby?Px%+rl8o|CyPY>8wn;lTG(o{Ds)a1VmSq`&VZ&761;evqB zj0!X_nI!I~)EQ0iGlJ_<)&@(b;Ha-o5cenk=@9&fH{HV%she-kZK>*z6hKbrIL2C2 zbnYwJzn~wO0^ka0oO*e>B3s<%JPA_DEA(0BRu9sWzv%m|q^kGTP6Oa6fUGyYsn6rP zT(|iWdgFJTymt*^SQ%5IIBpkd-zi%1+z8{+@C9Whl?a^(M9jmpwJ+EiBJGWKx;i`0 zP==ihl{*%*1xN=>FtAYn1~S{mPpAE34v_7DvjoP>m&i#uPPZCyiIk{+q0b;RD<~3Q zzdmX5dOV_HjFl!a1h^sqTKmy=5OH@DF)C}w;+xkYehd?vsjf0hc|vL^o}S=1!&ZeT zz-*~W=uJ3QlwrUs;DW_tK;_1K`uCCM6md<nO!{SC-vGJC!cst1vA*ii%`NH|c3;5^&~0STaCrLlAWQoJ19WeDy-cm(h5 zVAR_FQEC3i?8>N@5#$CT4QRmotYx|ZJWH==cdN&5c39}zciH6MJ6ILphbshtT!Gw8 zEmw%h>5vhvEv})at93M~PKU~o=!+W1l>&-Uy(tL-N*3Zr5zUWg10!aSa6xKoF}a2_ z$cu|HK1@C9k|A6=8WM+PfE?G;A?YsyI)85=`4%pq^_P8^&=b)aT(I0f=Y`d|-@dzuZ@|Bh1f9WR%e?!{f z5qm+ob@_CaEiJH05@!OZmV{EkmnkhR1tmB&_`p{}QgU^y=?N)_3gH=q&upAN?Cj_N z2S(^lx8VdJ(Fg2RK)q7?Yot3QAOKqGS029XRx4c1JgAVc1%d`ZK1ijkLY=rSCZ=?e z`iGwy;Ax2Iv(v<+UNNgNOW-q^8m;tzbabl!Qe}XsYJkT7nG({pKVE1-5-6Ia`y;1MKPWYX2*$Q^{LrJO^|=|If(X9lJZ%5w=H|mvLsCq>sD%aYSdaua z-gDCsxrZKG2MHihe4*i53Ddplm<2))5DzjF-_3|S`T$ws{=vJ-%KJ3Ihu~Y#vf)WI z|0z#UBY*byD_n{p9jX7u1XpV0wI{M6zy#hDI{ zFzgLD@{ow~1*JmFcZJ=wkUmAD1%R9yi2Jhp^P0dcxWb=ga8(!=d>N)R7{diaU|MwC zCj^qQK*V5Q-o6wQAgUSQ40aQEA26Sv4f6pPdi?oy3w9A?hv5lf6cv8!SBdLp#OC_? zoA<}ratOV!WGa8tQ}A6|P_Q%V4zvHq$SO}mfD#m#{I{%-T4#Z{JzQ1^g+dBo0xoKi z;^YvMgf#-Iz)tc=JP1pq_z-TL6vG>1S{C=3v4`=1E*HftsR$Y=OLQ3YEmBbT_is>b zNUkSv2+~ctVc*OpW`e7j07Jx5P?n>Tv5R`$RF$pFEkOB@P?ty<=t9-Sx{7`O@q-%d zP6+pC>*|)OUhCusK~N+6;Y3|v&CWvJNLVgh1xQUzl`7X5?V4V8gXz4Ussdta@Q-ymV_JY;j+Sqz8pHSe{V;xPSR7)M}AW$ zU|ryH6>WHXrOnA!JHunV&^k1-Au*IUfQJAg$P*Ab&HD1?Mt44)g#&v5g;CGZ!Bv01OS6EwX#>Z!qg?Nx%=v_pl%2b1bWI$b-K~gR;#gp477a zhvIW}lGHd#b-8CnbgzVi2lW8l7m_L|n*_J#fa;Pi42mQnE4<`J8B1-26V(ddPN=fe z05~Qc3MrQEhZLbh5-8kPJ@7k9hi3}UX+o;&{h~-1b@RXc{{Mzwt->U4xM;FedXkxQ)t+>?RgK>~Cj+k)N>@)c}GFh~nDn-}Xr2Zg>10rfi=OYYdVmueit=wMm61x8A{lB~ytuTVIy zlu^8ZkHJKeJ_haHlF2CQ{3&+``SU9`;N&H!c7`F1?nk@Ze=dohUSqD&rXMg#L%8U1WbXKoacl)ztWi zuUI{nUW!@YQ`YGtTv6&Av=H`&cLVtU*KOqGUD%E5n1x&?9Fl9-R?k+;Px(1`OBO_k z{Jj}#fqOq{^8fU*Z_^Uv!S(3iI@W#qU>0mkBTgofRW@kX!)x!?*qF1Y-AhtFJL6{Z z^quVB?^Ts%j!CaH_Xr2(PW;PJ(#>A}=@OpNssiC(Ig0pHzE-lj`R!wZiqutPmSs0% z*-T|CH*my_Jt_l#X{NcB?BJ$KVYA>!q|6@Mu6M>`IY(NQm)6y;{oiZH1JkcHHkNk;_qSjYFjmYR)~v93+3Qm3c2Soigg+P^TF+INoMc96EFZ z-iGd(X?Kb-Kc)Vjc+N5W-KF(d2Q*s5axi%Ott_Wm_vw7QE~m7l*uq)U>PEs&h7*s| zpJBsQY^kw~tdmvjmguSH-A8_<3@%NwVksY2_XvABxn4sbj6|%rQ5GaIo6v3V(YIF2(2X^TWn@6GW%OsZ)XH4`)}zH*X)Rg6n4}BwE+#JlSCc=x8 z*x|*)ekkG$t`a$*{*o|yWx0?(uN6;3cZe=*_d=d$ zT!x3ci>2}(_@p7enTZ{|rnvu=deE78(01at@qUu8xq7R~!{3zX^&bauX_0}8vYxww zJLj3k7jcEFtJs6s^3~6N>3cM?e?~ROA57yWl}!o5w&J}4E=25G9yY?>OoYVmK-@8W za9v=_LSJm*<~vjW!Cl`2JkiKTJf0fK37_Asn5VO3?(?z^gZ8#1Iu!+GH`S)bO8uzp z*%Vpc!3N#NI-T9`CSo{(Cn$M3ny9*$V^2GW>Y2V77)dl8oAZy8bBt@M;*#49QtI|g z(stb1+y8VjOZeFZV~nIZ@mbnq598`y*ZO!fwCiiTsS~NYvbfQcDKi-U(FeF!%=7(D z>H6GFwJ#0m6>P^|ua&{?1vu>W(og3LzCc^A*$So%_wVSf%MtqA46y5(bzMKO zy%vnb?LU+$gcO}R6w!21snD6CR)x(wqN%F2I{(x*VLH^MFkk$BrCWcstWPRtH-{6w z|CJe;V_X^Xt4f&Pu;oF@AUY5?i449gfO!Zv5MQOhQ0ibP&M^k`$IL0@4{(Doq|nR1 z`}WidCPc~ zRmd^h+DK)Q;wYbD# zN}A@}?E8!f>eY0gDcgxi-91@(jre0JCjG$w3)8(y$;Bx5mGyLWeaE^D4d;|dy?9ZH zZ2{IRX>zvg1EE~PRsk*O)6!p^7W>C^3bmiB!+gmF+*D}VTn P6$oVojoSrs=1=|?qxR4n literal 0 HcmV?d00001 diff --git a/Project2-Stream-Compaction/img/perceptron.png b/Project2-Stream-Compaction/img/perceptron.png new file mode 100644 index 0000000000000000000000000000000000000000..d6ff0890fc26a032eea0a49145ff06bc94653107 GIT binary patch literal 75183 zcmYgYc{r5o`&X$HA$yvTgu_^pBv~SqvZU;dHDup+vW5^sQG^ffl|P zgYZ8yaS3WvRKZkg%8EKZ{j-CU?nIk9{_Ug6{MW=E-sO8?RX;Az{MD)V{^UOaeF}cf znS<%mv+u4&J%8!WulNz2+VwOpzaA?>EI;VEQ)vHLL@S1cW~4n_VBqyk9W`8^ii+z5 zu{s|=-PhL_!!EYD6NqFZ4h*RCC-vCq=;}t8%g+)0Tr3YzUde!$d{|+akN@k%Eiu!k zfR$gm^@G-iddXICWAaz@8I`0FDDvxqu^&8*@sc-kw5yC7Yij=15#ov$sLr-cMdFaS zz^`jJD#@>+@?lZOzgFj8@!P?22g%CF{PvN(diCl?fP7jfKhv>N0?rkYeVFpfZK=AN ze(|RxE(Y+wVpXEqlssS_h0;B^G3|L+ZE2(uV25KGzA!z z5{wCte*deMb?4K#TZ`Ep#>c09&nnZBuN&-XojTzoTheQX#bU)w8eVGVM@B~Ok%D%+ zP@*@m!cM=!8AT{x;Cu2+9cg2GbaZrT>Tjdsa%7r1|Fn+^Z$eEAD#V7eIPbqY5V_*9 z0s;aG+lzyfW|B))su8DX%_{5#>|f ziA0ziB(H1)z#9|!Hg$r@sug`ufBfvaWi#DoL}S6O}^j%b|j^zpV4Nq)pGi8wI6ZCM_L)GX#d`5 zAk3_6?L;Xnv8X70D)t=Q)qi=F;qv~oHlJ7MrZFd0m&`<%yC4K`pE6(? zgRziCh_cZVJbg1#tokNNgB#`1!;gk1(T=dp9-DFnPRgKE3qae|JiS#Mw!5j=OEl&b~7$=8QNQtD4kd zDa(~&wZS!U{LlW{vXM#8$jof^S<3En_*KVsJiNSb7FGq`SmW=<5Wl~3T;jXEe5#C! zQKD#~Bd$EVXR5tjl|SiNxYC;CMZy0rRWiKL>`R*1-frl%t1U^pmAksSdOTc7maDm| z_FxaWRZ{&lc_UXXkfc^+1Vz+q*XucWM-ZxOb#3jkl+^s3?0?ttw=6UGXJ#cPCgxV| zw}my|jChO%w4D^(;N%MHpJ%;>3y@d1dewhv6k{WqXm#b4`aJy2&OUA%@t0>f{^tXm zT#O0j?=Z))t{x2*4<4N3iYLkL-3k2f{;ygSrOqnr_A2w9#r^*NT~8|QM7R=z#sBK= zOpXZJm{8@IHdb>-(9$qD&o(VBt<2!5RbS-F*q^2_Z~|9O*T(bsJyICwhGe_PqLOUY zJiPbQ?Z1BK=&lcav}+PKo;W6*D(I7eM&ibnWS@Hy|J;iAD`D8OrGzxi{Ax#%Y^iN3 zpW1A+U^K(pjqCe$TO*fjb%`^UJ7~KMKfS)Q>mf6@Cd;(jT?K49kZ@56nY`_;&XsJ1NFga zkGea8Z|@dd<5_&aIr?WW4Z!3Qe(`yDcNI+{x#K0>g`uO)NSgeq!i+HiwM2?$rP`Hd#d@|i`2LNuX*(V;x=k+<26-|Z;=x!C#us%K$sW@Xb%TwLQGBeh|&F(KF0 zLge!Qo`Y{)iK&T6AgS~6pws0Sx1gVWF5mWH`mgXFx*8M2pJFU}C|w`$gNF>1w@InK zuI`^!R-;!Uu2DglNzce&i@|u+7Yp9vO%P-`4uf2qC1di>%KY?pgW^i+c&k_RXm4k& z)UF`<0T9lVeI_zH(iKJJgLUZg$*hWC0yZc2( z8k*2W8u$OcAy$PJSKdCh1QW@0ZA z#d^XoZozyaZOEqJc9!CUzKpvCHk&5Cy^;1`#mKjM$jDu};=49)oYMo99GVg!WV;l8?~lC2_opH*WOkL- z?YSY=q}k&S4sga}&qZhF*^apQ01kNg@CIJ;={`b`^{ME4ceeoc#xh-h~O?u!|ws|nVr^29UsN%RJ5~u{_dJ&)^iDO2EGrjfu=T7-WD~YHB4l2u3H| zeC$QTMRz2p=To&Kf;(926Duq_Y&fA8F)_34!h1oBnjw@rb*}5X_rUPGhfHy=9mZnm_is0eS3ZqHIzm;ott8nv zCNkLhkn?@ct9fg_-mfvE!!kq8U-e=11J$D+#0-)zanNzbw)gb(%n(Xpi1H@<=lB)1UeJ$w8{Vp+WWv+#P>(wm2O#eWb4Lxby(kz^TpP0arvkw#^mc7e8<`!rY^N z(5)6F^nx=sf`{gn`b2!t_PI{hk{OG44CHa~vn+``QRA>$a+~9k!Sno<)j=x$*`BNKL{hn+Cq@K_Vb$(_VYDSTHAU$;@_++Ad=E4SP68dzH z4U3~WrEGsT`!)`F$aIRU!}Hb=iW`G@HtkVpo61A;lFd_qBVuX$P+YrQ6dMJ(L3zhx z(P(s;)9L+hp*wNq>&%L3PEWp8=5JYN4pkn;ZqG;L?i)SkF2JG!T_6f$k(y`zX*A=p zTxJ)#xtTi#qoW&(vG~NA87OW{z>uMt!Gkt(zDka{hCuxosZ^C z4}pGwbxpRKu5%(vb&(SJMwYI;lf$|m2~&hQNtnS;KOd`!jXjasm2b;QPftP8D4cHT zd{OC-YB{d>w9W|+8Q?NYKCf5nuYFkmM%Ts>j|GszFXwRG!#=B(%;$Un=zDp6cS0K` z!|0gO+DC+$ILS`EA~Ht!W-i1+nmSudMV~sE&?a_R@-Tk56)a{)nIbsc zZKY2W6a4g_MBY34OC$7(bo;d}rP6FulQr|#R#&ro1XX|SS4hQsWRb0rd?p%cUuMt{ zxZ@|zA^T|4!`=NJUNU3SwRt7{W+F>G_So4n1G>%(=N6bT9x}alD*%!E9LkaPRaGx; zN(oox5!YYb5)=;#Kye%dXd8 zo9(i&$76E}3;EQt1*?sU?W+8G9~_V?Z4y*uRLWwEYfDj$9E0asixP$D?v=*K6szi9 zi#SvD;jB`nkB@#3t{j2QN@FnFgVs}{kHL&B(S$P6P}9@XQ!AQr@TcGohbaPeAIzQZi!d+A)9*Q5-WOdm zL)W&x6Bswv9XOJwo`0|IupJ00k|)qdO+&||Jua5GNWZh~9~v^@Pl8d$aSoRI*~5xI z2)p^wR&|dJ5Dkn)$hkvQRGEgTBVjEV45r?Dp|IBu=yJj6@7(}_&7)XX0LeK$-JY=D zDv-C)_?Glj0}o4P`l~y|pU%|Dc10?k#Y={@n8{~?+#oFvOOLtku9JR1;GWZEn z6kMJyNGrxI*rhAoaf|J6ceipS5cJcc4@X>Hme5=1lYtJGlrf=x(5V*XRAp4{cso(~ zM%mT$xc8yjfV$TI9sOn_8K{l=_##b26p16xa!?bdJvMub^z-xc0a|j#=H2~EE%80O z$N+bqE!?1U^A0?8!DtXBj-8zXY*eC=+IH(KsAeePp@%)lzpw zjCo=v&w^$EwO9zo-S*QRjC#WC<(u$HIZy> z6QY@3nyY?|fGF#hpIG(X5v&9fTw4pkN>^6}_=5p6etLOyG&Ii5C|MsWg4^K9QO9l1BWh3$({z%HIVx2L%RddKVn0J{YAOI_djGp;n%P zH%y_$8qp%2#apFXgA%w(bQTj6ljG9jPwE(V1E5);a~zPKMe!m?1V*r~UC#VTYWa9* z+#%O^(lrH`j;Uz7v>&4SfMsLP?1B>&H-r8ePAR87#5IBc^R2sSHEDp%6FeTrkd3L!(%1r)sBk>=JxUaKW3!$Rt1-db+k5YKN;3k1Ky^Y7p{mIP z`EFrvcOhuKjoT)zp?PP{IVB|ph`_^#lVb3Fk_5DzDr&lI&PI>A)q=DQnlV{zlavv- zrK=lY(v0&o#+9Q8RvZzUuiG!w=3y*As#sc`yB{}Jtp_mSUIB}BIFHf0=^j*8P%A>LwV`$W2NAGmNHMhO`tFo6^*zr>16vjADjnw^6lS!!*CIFUoXw30&sj3B zc1+8&B}&P)@TuM6W0KE=-C`y#h?9U4A$tJtj;SS%E=fD)veO;0y)K6}jl;-I^g_?9em0f0GAvwsOzg6H(k1>-P9npa(u6@5<` zl_D=m$F(hDN^>w16M19OT=A23tS1`czw!}0A4$9dnXAu1RqG;n4f9q&NKroQ4Fz|IQ`yC&=V z`KXGADUyfTj$v}IU6o0r-z5Y+OlSV2zW#otF58->qKKy;Jp}E{n1XQD-rnBQ62e3C zBbS}RF}T=e?m=&}t^m9y$Hic5FN~AGmB0L`#N7cp$++8J(b?I894$Ty`tW=~hAHa( z+SNszt0wy{-$b&+SHmfKYdlRC31({b9B5{B%|e4h-co5+60hzEJ}y8$e%4{>Mp4Uh zdZqxLNF&-WWFfGp=#GG?(FVj2u(UV_Xg-v7@p)nGTo{Fbe|S(=^s`1sh?VY$t}fuP zoxn`_5GYvyi(!fgUL~3{-(apLTY)iyVB{2{_&xxP=ZI+7i1gt~AasDf0PPP!pA)pN z-pGo2NIlOMgzw|*96#wxtUeSH%%}G9gr^D%<|dzop`IishQ9E*IS2Sh%gf8#+X0fC z;cX)nW?$atAjqP$ufqgmaSLlfE4}(Ks^v4U#*G=b44%y`{I$CqD4&_acZ_zxcILyh zhY^x(Z>D*-sMk&ZL~V0tw)%&u)cer74*>*8_^E8Ok8&k}>XbJ_JzZ*_NQw~e@n zbGRVbV^w=fPSc!)Zoe`^a2{%aCFpYi(g#YjWksKv_>(@1eHC{v*2+*VaY};Dm=M`! zUS*Vf;p8mwaVxW-4W>YNgXz+t1r74wOJmN{Q$WXYy_6$ z;DqSvR=(et4%g4%#>4_|%sYH# z1*p=Gd9767Pq}6b=G`q&=4}zD^Ac30h{QERa@fMGjF~!M@>5eb(uiu}95A^E^Qq;{ z8*cZfNgJU0=K!E$@zc4^R_zEcVCc16CuNY&Evz!@~&r zY?*;{-1W7m-y}7Refh90a1#i`kLu3y*|9!;T=g8hlYeYl%|;k5~nhac z{CtE`)+KsvkZbQ2H25ry&dz?Ro&zcBWB5wRR$y7)!QY|5{uLaj37)cX!(t1{8mif$6@}uxO>0$U=7{spDU_+IPKnoz@qb zqP|#P#Fa}s6+7ueZ@^gCZ)bZBOU9T`)GK6+)?mi8v?vO)wEnTPf{8zud~9-@ z43iy+PH19kUTsM|O4K*QG(QB)zcT=-z{riqRs<$=l)vk_CC>4d^yXm5hX*rEPbeGG z<>AzsY95-bJAlFq3Wadx;A%AgB?}ooX;T@8A0YPnE}lPIN;Z?)>Co& zZzqDd;D5agDICok%eH)K&u=@pvQz~c)ZE_$F(Z&J(CCy-Yp|)ec03lu_8Ume%6;ZI z($?J7&0lC)-(;}`8ekKx?}&z zt;N`;0y81ZkjH3ge&H{8Y~R0sZ`+9iKr%65EzUtleXy`leoE^GVFgxYVXc0!#GIqd zz`@>L7(qYQpt9dZypg!nu1O$RU@M|TK`<@^QxgaugGci7!ovIa?^iokAh+JIpXG?S zz(EJ@aIqe_OsLPjxHa7rH*9tf1gEu+S7^fMSZ-RLAZ=)Y2tc zZW`(-Zs|BM4MCj>(G-Th0Doi}5H$-ns8Uws$^^)Wxp%L~$vH};Wedvt{7CY6OHWS^+Cm2r`}^;*9Imx!tSz~%|Lba8Tj}E6cG;)4llMs? z-?7kZlat*YD)2>~%sa?-9Su`7CUAFWKWr1v!~fOE3hkoNB;HGEU}#7)AHT46Hl*rb z>9~>^Xl{24kSRD2_;KS6CSv>F+YE^lisn^gLYG4lB&Wxs>=|Fj!rJY`f0umbH-3Z{ z76Rp{MQJ9FN0v_+5>`N2^YYR|vI)O9-bI2Cf}E@@+?Pi-CwO<_bPpss(_g>do^uX@ z)ePFiXYwZOLsD4>y0(VBx5MS255i!fIlB`$4A5NOW6gJ6DeT82`v>L$BOZ6F=zDuTe6KJ;cK_?^R6B~X#X#ZPRRM<+WRa1T z9T*&Z#CS1>>-!Eo_e!KNJjS^>XrD@1i)jqS=IJ#h_8h9s!$CWnD@3$27yvTqaewC7 zIdZ7r11G2vu-$Zo4j()~(Z{3_P3Vg`D1s* zsL)Sgp&znPuuqHiZs*=DM-CHh%sC>+2s%1jQ0=Z%8dx;hf)-r=^t&S-N~M7s?Y%Jc zXl>AYCr|+-fSnZr1PRhLW3tynVlyaU|7TYAmNFFQLNKe!6(E6KlXWG;>PHI07mW{J z&FoTm&$dbri5QH+k7}?-#W|i^UE!b`4Y==5mht3XsRKu)=(C{0cn(seZ@}n*-Z2j; z$irvxK-g;ioBQ3OY%%^Vz$WD)Ire#AeEDh%jKb2{%fV#sDjo$ zS(vWvP3`D7E@fe;5$`FNuK7^nm4H@8fZ3yc=>3jWn`OE#2KtM191l$*UoGH^fuh5B zKq<4c2FaN6*+=%TfT)8M3Y1`GMdmCYf*x1?y3^VE?ThXL>2Jn_KWbYZwKm}ejsXyzx}ghwd< zCV9l=Tz-p#)P2#E=xjj7kWT|$rQn~HNCHKhHmm&Y?v+HZY=pPa* zmpdm(&<{aU2aSzv=kx=}*tbdx3#H}bAuJVPz9`#_Mz8hin?j(=LFzWaalhbmDCscJ z_ae+cl{;{#lD*T|$&LC(f4t{lIg-;;dS2AIeX4vA23Jw<=W(~(yC9mT<^h=kK>;Ni zsmnGqSBQgfNXg8%*a)bQLA;@^u3@smFBZz|2wvffwvglmz68B_Z+Cd_bUZdwA*iXj zd4aU?^2JqOeTv^pcO>i@&-=2nEZhDwT)YwHfIq>0XlMWd3RwD!Td<#FV`J6%te z&8~`Pi!lFDEkbh;>spS4)^&{s01migkG16~6=R@Pj(jL_iX4jC?dVV|*27DRyiz|d z>|OQKycPw*Y(IgQEoRWM5_S|^8x%|Md9C_fq#J5$)wUeZ>A#cfZ-#FTfgk{-fz5;& zR_YOVBh3Dq`7x3A?6s&Z{Z~$#G|F}X=IlUUJN_ASWg58E1NA?Sh)ELe|Voh%k|9J96Jkb7E%w= z2njlOU*C>$WV&*ozyjp8plt8gw!NSOIM(~wN0yEHELZ#jgpQn@hs~!#?}I}M;=7Iz zME;=s-4WRGca{F&kXNNMZMv0v7s&?Opp=lrH=P&w*M5bjmmvYp z$#TVK3$~bbm%qEl;}S`G%ph5pC48uTaOAO|HVbpq$NBjZ!RS7RWYO(wB?kUrmrX>c zh?!<&Wb{`<3Jx4lFE1}A%gX%#(sx7`36QAva~sK9&hx$*-Uw{9Bi0od7Oouao1#U0 zJiN=ME7{de07$@(AoqIe5GTWCT3&hx<`!_ z>`l)voz}3WU8L1%vPhv*eqmyl#h!RILz*)>`$zR7+Ou{rEb{Rb#(biU=13TnLdIin z;!A{+fC{l%4nYq=Ot79LIqOP;80US%V=^gC8LJUv7Xf)L_pZka4Er1pCCAt{vIs*V&AAD5FzADLN#s9)4(ypAKa~Q+ zGXm`__byntpzZ)uzm0saSQ*^g8!3HHBK=uMBJ#{=cd7W}geU)=lNVe1{Wu>(g*^_I zhI6_8oU@pyC^(6doFbsXC|fkHZF)Spc&_5YslDu)C(8dknK*9}=i>eH0?;dE-jbPW zJy7~^TvMwWrt=VaBuHv#v&dYX$2E2X?~(UTYTqzd5w`Gp01vN;gl znH33P(`C=n3B^aX>~6;B;e{ZGJwg5$KrjGzuwv)t9G#rNm|ZUQk*y6?HHUb7v zMbPhAqrdV)4A{E)^cO0?lPR}>(8ksK)t;u{W{YCJGZ7M(t+vcx98b19x}IiamLDN0P{_j-5f?jZ@mgl|`WJ1Oz= zUtqU9%akAxkm7N|f}C2~ws!Sx{uuE0KnWKD3?H1E&nPhL8Pfb7eZ`;4&*;n*^k<=(96xsHl@XGA=Y-~z| znV#_Jgerx{pCWUu=}7KC4!R?fI-oRo$bep#ApCtV&Z(-)A@?qb+Dkqt7M1?S$L24I zeT%907j6dunDwf!b`*}zE-cIzEJx;ueQ`!TePYk};8xys2}IHbV~2-KNhOUHQL)#l zsa$kXb%Rd%_*x;+p%)D7DMhuHD^E%^wn6tQEG%)=&BjywtDVe^Z=7YVx>^WZ?`XXG}fv?Z}ZN&<#x^vOIbFSnZ&fe}8ci=iS5P;Ai zX$yW%aSB+{77*Am^(zE@9Hcvl?I&9`nv_C5jJjzRiTmXt0~!Mi8uA&6Bz1lWrJ<{< zb#xnd>}k+aS*?mZv?sX`9=`u|2@B(E1Ey~t^ z^%j9&tycubG8m<2==QcpA)lBnm@NpC8*FUgp+GVK>#q0_YO;nNO+X`*uurz~7hY~p zFR5DpLcG)_4zUqXF08IF(gge-x(AWA6kLZTC>rP0z!LPL`iNb-30W=9SeUZ~IzkY4 z4KyX2)O8^Ic34h5#}To`8WynmV^zoCh|c!z)&dM=xTuX}x#N9CYPG~?{m`)S(YHZ< z2xEo41p0&|XU~vbBH!RYI!R{YfF=JH{0wn`VvA&}9&Os^@9R%+6c&Pt47I9slN@Is zCn?1BIY20zNFMf(3)rc&QOqYmH?gad;f#fx(wX0_`KvxtbtXbDAk}%B`1WREB?QKo zr|;FB{bAf#v=YSzJPE?oDAenWj8#85$hXl)ir_fuBRLqUse-lQ*z_^E#5p){VJ2?F zaimQgl1Ectd@?2>z)_$B1RFMr#H((Rs5`Cm&&X*k3AUJ?OH4>yf3=1_q1#q_IfnaE zxg`d}}{P;;Vl zZKwoVOKpE|3lO#vf3TrstFg3ToCz!x1%_ zHg^R|bb1$`XFK#ySgxmvH)3yvP`Y|%Pk-+yRq&;DS45+CgODf~F?qHCp%fXDapNbN z*kCyDsqmiE)qMzJMz_^RNdPg|xo<1Haa#@9WnCbo9M!uZ$CYg5Nj?Y#wORsaM4pyJ z*M7|hR~=x4cL?{Tf6P*?Ay`R?heJaF@{OR!T9mm+&yqIYcg&ts%pU;%bJd6T=ky7)@Xe9~_F9iK%~7T&|NXHUO4Mt&O&2mO8(*1XIE zBAhJ<^ulRjJ>2ei`h)A2pJw+!WDE>%d;ZKW>~`DrzeKtJKC@P9KXlaX@T2EfDC7e& zXdW^iy&BR8=njMp8=K*$MZ}NxO_IiXMn%0%8xv`ufI$7j_JW`T?5X47?+5m@RcJH2 zQgGwE2~VuRde{tT@=p~0E?@*1`oQ`8=D$IM|9W17-W6%l_t7ir_Ett9)op`}MP`Yf zD5sM%?0Zof4(U^5Bp;jnmGlsNwM3zVbJlgM)egKV%@U|wwPq~^II@@M=2Huc(7Mm^?JBfN@J66o(q zYb^>XU@$pn+$Ii|;N1cbUtiGGl(M>^_kdZ@_=z=+={!633rB&9k&)B6*kYF5enFKPWma`lL7Nn!lvV z4)ebM_qx95rV-MdHGBPu7B7?5I>VlT3(+H8GVQJ2xn3b#$xlApq!(*n*RHq>X3jk0 zOX$@2y?v$c2T%F7ySz$W@C_8I!~`SR2_tT2Z5V9?PpKFr@M#ab)QMht6||Qid|HRg ztvt4}Ky9S_9hBeOeCG5ciEGWenalQXy>w9=t=mPRp2N{_;-MS~T%@h-x&|K3{h*1t zIFZ(oMbD)yr|9Lo4lLEG4_z0*-#;$ltNTiDz3cTIR3X@K%Dc9u#5SqXH0Xh8bKo6dW@i$8KO>11kA z3T`X9w#M#O-ub(JH-=7hbnkAY5O)`4jx#KM`850bb^NC%@-cNnR2LokvXFKZyWF|FOR1_Hl*A%j@ywwppapVxp z*(l}e>T1X;LGNB0uU*-~jBjrbv8E`60RAbf>7HO+riLtBW%}X9vIew0oXKpO&7HJs z9CY0Mt@Y{O=dX7ae(&|ab!5Wxevf1~zxcT9$tC`=v!^&e`s}qZ?dWNI`}f#hz5D%X z$pM~Yzm(&j-F!yPntfBb^_2^QR%i2`dsq3of}DEmv%?>vd1ye@P}m()0CT&{0FEhK zQBZJ}{0p)_gp>Z?GN5!$TCtL`_dt+-m1u?6XmBR z4@WUCLb%Bn>==`a(>6QlhhD{?Cr+_wnx-Ni@+@V-GPK5a2jx*<+STKnua zte1rMRzcJG*g%{N&QNG6>_CDMVi`L>r|0R{R^`qa1sqT1)WA#9wMljsmAqci4;^=d z!^r|kQ6)2AW!^1#l6~{?U(FM@yIopc5ym$JX<9FJ@yWy#+>E|-_u>bihy5|WXxNG^S$*#qSqx*iQ}b&@Zmw@ zA_1~ZsmMt}5qk9zSWtT=edETaSRzcR6G8G=+d8bcK4{HCMwPeml4DEj_6 z;=4Q4S&_do&c()_vA2~|bky@Nh}MJYZ&y{pigktOgJh#U61LV5ax5E@4+*q5txI|l zZCO@^`py4(G@HY)TU&uK`Fs4fj9Ah)=NIV~vUBy1m_xLNy90!lTk3tD~$EbW=L~KZ*wi>c^4RdhFDn zgkp_QoO@srI}r0y9)TyRGz3|$7Dz(FVK|?aU#wlEGK1N|8q^$mcjUgV5h`&Huz7e% zRn7P)=oWQJBMT)w%_~=b?QvZ#cdYNVBYT!?v>NzUa;yiO?q=x*k4`v;xg0UlHatAD z!glT_(tXW$D%#Z zHdAoWORmbufSwRx-Vo%u=p#!x8VR%@vr9`$3veA_CYP{muS#1N>6dj!T3Y%I@zv3T zA@*|0XSqUXm=YR3-d^}!x@R_x)>@kBe3(0|$S+}R5psB7jpU{x%hl?t%=iSkw_Uns z$;?35+1R0J4O$osDWmlXa3BUY*XR`WuEo&KIhm*C@z6w=gX=Oj_CRKDck^lriZvxb zeh&~}ypSlJxlikD<;17}0gMXYsGMo@L!wvnumVK#L8~}sSz*}Evp`sj^Z348s_>>r zp}RY~@Ym4L5S+RgELaV6I=-xh2^W3g^?K_rif=kI8h8E8j2|RaM2RvI)DFRC(9>*`+5x+BY#f39cY25qvHBN5`>aKxY-sey;XnR&L{r$rh} zoT1dH4??hDQ|m9E)~zREO1%6RO7q$R*FVoQ-VmtMTsBpY96zR2Izi|-<{^Vjrx9j; z#YXiu#((v6*GY|Y9ejO5{K+b#OIzjFpW8C&?razz4tqSAAWVe`XG>YjIou|Wz=Fg! z=hgM~>yfYnj@a92Gqokc@7^P~Lds%HAsrn)4o8huC5=#Y76I0-34u*xNhPKzB|a9O z1Ft^Gjx0DHa*i-p*4EJlg&i_gJvPevK147Bp`%IOeo4;pKoWbWrnG<3Fu4jPI!W5F zl(?XgG;b}8`+dl!Vkc%a@#a9vnd~7k(NLGY$pHwOEUbYw55=^$Mc0NB8PMWmf-pqz z=@Y2W3J1DKaK>$NLr2IHP__ZGr2Sp#E<>Je6CAD%f>CB%pp&5xwA;K(*!yfRWV)VzyRpy}Rx7#1Zm`-Yl%%9QbEBnzU5MG#wn%{f4&`?MGR1#oHRe0wL z&!n>W(WtxL#JQO*7lqY^FGd&bXQvbDdlLm!pHU^aBR_ujenl+FQi;yyyTw~T@?W&O zXI9qw*X)kMRr9f>9Xq*<4zsc!!#gJ=|2epnZWn}jdK;oqaK0Yw1_#tN;E&*JtUK?m zIzzJ0({rUOXxE*GG5D-sD9<%6si{Z?PWGkDfuS1e_t%c#-gEf-k9}+N6M5&^TSHy; z^TsLm1I!i%U!+9W+=s!B0w%#xw6?X-oSB&kIksnPJvRT0Ed}3soTtUucauMwfrSKH@D9t`WQ-NhnB3&~+J|2!49SI8|&K=P{ z;CqKbNk4%7+vsg|v`Z~2lxI^VSdy{+HnJ-?vAl|0k zx|2eZcJ4d&xaFlZ8~1|;^{-}6a7XAV-g8LB{=TDsbcfp9`566+_HVN7#GM1qFYTyz z4y3$4FE$bW;qY(P`7~s^EEgSiZ?}0`GOuvW!c2&7dQ_TUE$Ff)UzEq;QAG~+wE9b; zpcI**;2^phOQ8bz& zr}g|BxqN6B>nYS;w@8=6H>*ced|l)LPQgN9Q+)Vnex}lUVr=cBmts%DuROheU1WYFxnJRkaRoBZ z_Qzc6@qpaM^ZBHO;jLAK(_2x-wI7GX;dIecT>E$l{jQ=Zx2ob8yExsUAtC7;*lsn~ z65om?#GYtZ`S3ViLX=Onx@?u|^tBrW-cMt=NyFc27&%6;+i*J{!g2(Kz$=)YC7(3| zyx`R=Mr7tuV-l)$MMO5;g^&z{T_N!(%S9UsGAfvyZ8U~FV{YujLO|W&+=SgF742(J zM8(8F`ZW_@H%o5GNnv~6a)@MOhS;n+f6tU&SNB94746$VOvEunyVZMjM9gsBjU-z4 z5ssl&Be{2LNBl`&*DfJ`omxdktITb9Uq;|tkpdHZ;s zgIia?8H?qX4uP9S0UF2VAaDwSR>I01K|Y$+%MQh6k1zzfy)E31tJe2c@=o4@}6y>*@8EtEIHbt=kM$ueSUPs_$Doy2(k$nwN4zW zprD5h zX(KMuoUyGa5$xr`ysF)@Fh^s1j}J8lFINo4h|gXhVF)$n02{ynIYFwmGL+P~NOzI) z127vK8;}Yw)`RRGB*1Uqz8$4J+A^nmPM+jB;VYkT^TSKdP@cbOxkjsGt!)ozNb#KS zfFBqLkdH8bR$0S#=weCw>l3D%-Rak{9y-af*LWDs;H#ozfwK;Ou8~gH+&vgK1_xC0 z@gYc*&5fD$@6+5ztbc5dqz8rCop>)F-&=F-{lUc4H(LK#N`$O~I7Y1q(Ktw&z=0(j zNw%19J~cS?AuKF>Q(x8(iG%3a5j2Q=(b+uTtV&-yH;Dzm#ds3DE;KM&v$fVg3VPVd z0}=Jn5Ri)D?vqut!6h5+uKDpb668c<@~>U?0P_@GT& z=fjVeU20uqU#X8ue{~)3k%d!Ak{X~$sPjYAuqS6j6XK|F8tf4y%_t{?;V4RZmYKo9 z|E@ZJCePL`RqB=c=1x;#A&ZJ_sw(4S>{y5)Vv6mN)tkbhnUz*$_^lCX4m#<$`^Ltd zmJ+5mpU;k(6g=`d4+%VVyGdV|AEl+GSc?mvBK-K6IOQBD-$jF~H|c_Drs@(ne6uxFq) zBg~=F6ZyonRO=ax!%liXp&Z&d5(aR36cgiB58E4zgKyIwvRrFEvhhfEdNtaQdlp?% z<)@!DJNKU?ul)SQJ4~O+!X*534LDMePvB1)aRE3tWXrWu{1msg$OBs!iA(CBewz-i zemMMuP>YMxos*!|phS%c=G_N${BzhU;}4NVY&B3}!RE@(hu^Tso4&~hr?WzDZ0THmKB;vM0&9_prMw3jE?E+n)HuCE5YJ3>X?d>fY=@Es)b`IHb; zA|EKWc*(g0cGG#StKvsF*ctwhsWXp@aev>q(xy@&HIkYLgO))^k`_^jqD5(?Qrgq5 zJr$u6TD4LrEn2i`S7Edn6qQPnq+OC0zw4QE&iC~@|DJQGndkX@?)$p0>wSNHa)T>i z1@k6(`~I|HuLtx;?+{`>kL=Ks(_rx&Tz~UOIw>Teljqj*2`-MnR+;6SM8Go`2xNo| zwSIr@+d!K4$uvS^cR<=F^6(ODA$ZR=^7zu0x06*}*LFGy}VU$Dj-Uo(4{ zg??Dti~Psd2J%GK5dSXj*pVofF>;i^l}7IHJ|&5g8+Mnd5rJFSEUjWg&Upx5Ys@3v zXp4}wB@W66kGHS4LYMa%!G7+LIJ&jv3v@UY#QWb77W2GqZF1J83!_YPEVvsGs=!S@&P^9VM z*D)on4j+ETz#&Q;YC91WM<{IKo!tiOfD$(54N<3~8mu_|<{ja8}}_j4PNmT%ir%v}aKQ)}*ek&aHeuiMy;@ zik2TGUK85(rs6MGHI>^VzIBMpl2&3!)aZipe-ixhP>!s$ccv+;3sjXWM5Yb=QsRWWQ{2=Z%a9J$`m*kP&n{ar`(KLvP-^ z`S>xuvA@P?{u0+7yq|c2IG)e2D)Hf6=T}V{glrAcQ^AMJ%bmm04r(0?|FK!^nsTb6 zqhsrk3%FH`*eHzua4TJqCaY?}4?$S=-#1^)&)T9E1sfR9PD3sq2fD2e&iJs!zs67- zQ4T292QaOtURM(b1*)#EWNPImd=89jvTH+i;%e3HLN`$383A@P+dpbBl^!6*7P)K# zhw}QH=og_m``D*j$2YK2jlN7CfZ_L22dnqW440O{srs{hxOWJv^S7iPQ6v005W%4J z95|rIADm%L+A!rK(1Yib@te9sG_bL;0e^>Z>#_NyX}d)ehr{%*557(7o*W+!r)=aA;6-Y(?Vk8)*&=ty|7Ray$QsT5kNK$h#^B*OcA(6(0 zdBFPP$B#n%qbT~v_WlWFyyIlPo_!dOY}zcF*bGDE@H7sW+k$?@3AuYrl#MJ7dqI?_Y=r!GJU1L!e0!1eOp9+O>LS%XMju6Vm}@Bft~= zGlr=#%_}SW11gR=5z73TSheHdk9wk_|Jjm7a7I&V zq{W)BJTYS?dXd9LKDAaooz)pmo$Jza7O3;W8Q~b!aMyjGmeY2<=PB58PzR%=9*1M{2D}XGL`CuQPft(dH-2dzSVPvJe{&-L^F*_JHhFfsxKl&xwsm~AE|+|- zqwMSaA2UC8EA4g7Z3oo_C3VjjzM>Orr5_IefsQckCEWN#;wb$m`%LLN9`-uwPgaa| z%Z=2d0D-ecwDG+5n*~?x8NwNX0E|nyQS8&oq9QPFl=#rk{_n5dzf>6DIn-<7>Nr?n z>lfy+DH7ri%dq}4_xJ>2Bk(1`;H61ygI2-3ja?9qhG>mqM)?8IfMkC1b)SehS@~HVnyf8|9nA%9tK6`!|obnm0|KGdp zA&n}4%%R1vL5Dt*w~mHSdg!T8$aVs~(2R5QUfXF20r?h{XE?PK7wR@&*%ND{WIV2Q zE@7l6@NX6cIu5+TVQHtMS?kv(M`r|k^f0%|G>MopH^s9mRXcSpoM`Pn^oFQlaV)cg zZ-gq}TY;9(5tw5AXP+>0cIJ2Vn6SEnJZVOP&@U1pEh(vbPfI;YVZ@$~MNEBVqf>F5 z%|s0y*ejUq+N`)kRkZk%_g`Y`+f?X3c7X|{#;}~KmS%brmHTqU5|^x-|9!ICcxCxG zfMqFX&8)bZH7sY_^I3NBbU#Tmk82_fDS;su_%e_khQVHZWt6zjWUxxAd{ED}&<+H9~ z)0;LTO^!z$5H{j?aFG%6c^o56bB5z;a7wclSj-)Q2iLDi8a&YW;4)iASX!^_ar)0e z7AaFs4=^~5^I%(49ztx%cpU89P~FWj&lqTx;f& zbsL#A!ctZ?^f^NPN5vt`1CXp&1(cH=nVI2-?s--CdU+w$_h4AA(FxLDhWulcnR%1C3{be-*)% zaZgKCiz5(};LJ?l;|mS$z?@`e@~{Ol(6A`cl=x~{HJ{wDm-z2Q8f|!IssA3r!ong0 z>V|IQA7eanjT_31t*Anr!B+8zm4iRYX4-Rg>H_WRz7MSIu1^XJ5#Ulh_YayUZGlCb z2&=uwe$oC5#-M*FJ!99w19aj;;fkb}ITPP7F)CDG9e*~^C) zY6$ks@;XR=sM!2E5cxm034R31cOI_y5Q2_zJP9@MUPJ~jgYv3>0^w909rY-4eSxp# zYeG=mjDF|cudCRv)R+z0Dq$N%Y5mH42g(nq7-R`83;F;3%G`luZv~Ux3BlXVaKSV> z>F5kj?R9DPOli)l>%&V4)T3OYv+&@|TuTkl*8-o2iK{Cg?TLuDajKi$C(IM6 z7-FVqqOGdc8wQ%$tT(k{t&=3RJ{^3<z!5fu|85YwuRD7nabsIODRLtRfE4Vj2u>QmKQe3!^RmtCIe4oRL z;rY!}O)!>U%fA}K$h$W_+k!za|seOd}X&@j?445PX@Qj*vSYX=^G`EUsWPQdLop3 zEWqq^kpZHA2w}sO*pGSW&BBsrOK^K2#vDU~c!U;PMgxx;x?EVTVKb6c{_ijpY=nVY zusUO(Hh0V5#Q|nS!l>LD^qHqWR$HcN5IkOAcqE;j&yo&d#ONUpn~>JvTFXt`Y)dJ= zRE+#cB4}uS>d8wS0j?DdIy8rzVcBq0p4&yg9aF`R^H+tMbw=wp+QYbG0@qZ_yFc~x zAdL4=W)12B>MG!1b8~X=B3sFX+5cD6kt;V+AC`Zxg=1b+R8)oiKB6+bMlugOkx+rt z369x)^76OEtuJ1@XkoGO&XHhcu@JM$$o2UWT=usmXmx#jb$K6b*_IK;ly3Vd@hxG2 z{=E0&e467F_en>I~yZ=2GJ58OL zx&82UQ%PT@JVVSF+}CrYuZwpxGc!@J>Slp%%EPuakQ`Xk3+5uj$Sgtobmzn)>yJpR zV2(ZO{p2(yET2$Q!F)v9USZrKqhY1Qb!m`ADN?ZY@8z=I{7)&A@4rrUl3yMnESMMc z1!(zAmZx-dUR3=_vKaSP*u%Y)KM@aor<8gW*egSv|L-T%4KgF4P>Ih#9;PggX>gPcR`QU(a^`B5ZOE|xTkDNNEC(Mlp~=1&C)(gtsdape z(ci34&?b9-!@vd?V?<+2bTrTxNI|g$;eCF*pHw_kvucC-n-MpaqzDUr@;UB0s3=~& z$~rUsZjbvuruM?mqO`~O3V#K?Q1E{zzXLCdj%0jOjwc)YonVhVa7r4iBqS5ju1$Nj za$kU_OyHWQd{XE6HQoMoeW)jWmGJ8uY38<%LDQQ;{}JbpClXNlkgGZG{BDc171g`Zps-~G#SR1UvI#HVFYsdVGYtC%ZxcN=lo!`oub6bgbP zEKsx1uPmg{N=B4;1AhQPl!017LqS8y6rJ{veg8Q0!ha5t2!Ysl%bUWj;@`_GKN6`y z$0mVx*YfDzgfB%Z_rwYMU@NL+d67X+2!?)(an&98*xX!Pe!}()gd=J(gY1mV?}Nhc zOB8k94Y_b0urBU|OIKc`ZUJF3RLx+z80-QCfDvW_nHkPGO$sh|wJ2}%oTm(#qs>b3 z8S|zv#g6Y&1-*R#J%_(*M_I!QB)Bv@Fjh*VXr5Dy5^r>B6&M;WcS)lF*k0f76y6Vu zWXqJht58r)FA|W18yqBwRLzI)E-d)|-RW9i^e_{x^Kt@diJx4PGCy>}4_8@men7-Y zL5A85Pjl8g-`Rl}r1pWZI^gpM6JmIB?&;simw=ZDnuCiA95LeRA3HiAkRWW$ubNuC z`oE8mSJ6oE{0X%maMRZHwZd1zU6=ou5{3l|r2(y)1zhEetyh)-6~WG$PIh0pce}=_ zXY-I?5f)r*_gm0|enHflZ>ij*lfJW4ZVq(Xjnp4bUmX=6vW9M`ds#Jg7*9=RCPGXn zAN};&AC|_VGJP!9mC>!=d^ zoOYN5XJ>2iyH5XF|G)NStmH?3U<~l3sNjkChrO*W3?T6tRd>%kqP)V%R@^x?EBq(N zS=hap6;4|i+@YWRnI1+X2*)En%@4M2ABubilN^QFB`sJ1nq<3h1&v%`^htgT1~~BH zCo~y)yl3?VZrj0|=IgsyT(!_7(}eUm;s;j`+M7r-?Z4+@B+AXEX@dBnx&BvKp4%NN zCIODtxUzw?;0*3_{5CZUTO>|w=&~NHuR{KJ6W85ZBMS((@gwc<@bHw{nz0#`vbTv2 zk#)zeLV>iBfpOnLA15e8kN%SK1rbu1UkMeD$pl~+9~ zjBz*~1A%PT8!a~)X}5Abt_Il#(XQELdAr!e&%#Wns>Py&x}Wdz8C_SEWgb+Lp^3=@ z$^~cu{OJ@Fg{4z#r!`_*23vKrPD?!Pvf!rnvqC@$`3VFm6pWbiF?@C~fxH3pCV$D9=#61%J!6BOBF{XH+F_}e5J=hn zXF)-H3gMTpzkio>jvy#lNBuOBiVQ(SBQhs3CcbS1O66f)epf5lyY`3q`Q;W2MHE z(_dtdN8LwK6e9oVq!FgvIG90$k%~dI6b?KCE#O$Wzs@1NznJ$O zLeW*1a{wrzMi#j_y8TDTE3a1a1NrIdesl_)mnTHuL@x_@TWyR$(x+CqbV zIvSZFg=AiX&*c6|Rj2Z9ssb+(EH~A z7Wy1QX?KXSyJziFCuDFy5jXg7@Ln6y>B~9nJ*gKxfVK}JKvPBtpceE^EK$tVLuaKKGfK z89Xp^M}S*4($}9Q>|Xo*7NITU#eUJyCe!qu%`NML%{t`BQw28ibE1_sY_%^|GJ1CJ zF`4xt+DuMXXjeB5Ic$;4`E*QG3j;rpK2X(dx+UL2Wa)pX-0UHr#W-lzJH!mSmUi(% zUOMaL*bzswY~2kUaExPg6>VYWTA903h~E?ngmdXL5Q1m}Kmk);jks+KJ1e?|@qvit zwQ)718)UC!(aOcQvYnv4^w=6!&pp=OidG8#Z-9G!n0^PpDDf`Z3?>xd%c^aQV&)Co z!kDGV>QTi%oXk%|`+E?AKj_3QgYqp%YoJGeX+G-Kwc{;2`X7Ko2`#pvNDRJ;CMUxk zafWBk1g+o@Vi|U#`t^x;ONJ6E{(fz^ICiDh=X8g)V`fNOE*i8o@S~H)DaZq z$j!A=^P3G*)-|H7!8kM&X8Q=U(|`$iRYb0F?O|yEmXKf@la}K22M#3c^Mo4Li~HOx z%;bTrcbO72xYBmSDISHzWxBHT&#!9$L3RpC8zY>*oqMUta*S`=mwVYlAe`{=N)IGk z#c$$(I|RFEWQ1081_KXU-pkexz_m8C)3z@J*ftn#H=vEx_st z#Sy_JeLY33efP(e-mePp7roBbDwFuqTw7akw`MPM zv8Xa-2e`YrfuzEKb%OdZM1O#^ItD$?T#qg(!z`+x5R!|LCTIS4`z#y2o|6U`l4JZp zL*dyfgiPK0VOK`92-AAd@;v0*1%@yj0jt)vL1Z8d3YQe}02sEPpo}8RqzG}C^Ln+A8 zaGRKz2&xjvsK4lMF^85B(&1KNW`VF%&1j?lW z)nG&-O?GS(glM=xyl11t)oJ+%*t@(K%|PtCI@xGU8-J)nzmoON}DD@MtuUQpF) z7)XY1zNtxL+$C4;dFueT0=H4Ky#VR>gInV0@3^+dneRZ)j8f(B0hHGa4{KFVyUfT= zQBLt}P*pH#B)Izi_Uto_Yv`_^a{}$}t#hVoE}50p!p1&)lX8?2owY8PQ1<_Lra zijEcyx93ztzC<{jTe5{$OC*YL$lq&|m6xB#hmEN|Z2%cx1XX<&=VvD;chpfIR`U>N z5K^;P696Y;-lNj0D#D#{Bpn7FV1;Bx^o@-z^##_%IKaFFKz&^vDsOzub*J}h@u#-5 zG=)oOYoB;P*QDH56URv7?Naa5Q>@lnS?^L_3??pI3aw-6KE9D)ouU!PM|t5#J@xtr zTeNB9X^ue4;w`3R4rRW}9`cn~xqymQ@LB(bh{kgsTNDbiq)Ue6F<#eC8dNgy?Lrun*$dvtv+^uP$MQ z2>nP~4z_W5d0bg#IY40n1Y~P(k3M(b5SgI|g@)Jl^~y@!RI`c);R;MWR3aeI@7){F zXNBfvZ!uNl@o9-bC)8}2neZJR00#_|wYX!(QkxEWZzC@p(s>7ZBfZc%9=H+ukm8-G zB5N(TwwjfOh_!k8qU@Yor7~mRsO;-3QSmP981~~;zc%%}aWqq)%?TWHF ze6r&W&RpPBIamP3Vd*q+rD}b)=KhX+2 zUlwM{>hIf+a{aF4Q^kLMoSY4?h6YG0n}sx?$Uu?+JH9dnU*nQdq@v(xLLo#K z+PML5Km5D{b+v3_Si z|25}L|7=vpH$%u>@9NK*3GS@+Wq^!vkJoX2;{tckI^HCkO_5%Rq9~!s#VC~fA?;1z z;OsZnJ%eAWCAeZad1Krz6^9Laels6e5EYX9(k$)Iqn*4#Qc=0uubMi?0B4Hz9pPi) zu<=ez;GNNkMo`<^#f9Y$!Z3J!-o1N=*VLuHKSqEQg1Yuv{SWKa42u{74@rZm&)|JH zl0Jz$8ST7f!Aqwu%!UCK!bUH)pkH4+8M23M^@C@&hPY*johOI*RjE?o+L{_dK{YaB z?Pu0s^`fL}8=1tqaC%D^c(d4iWB&;V8>oVeJk5yP86BN(xlq2oeDO}aRVX!a`~|Z% zyLuEXGbC=T2U2iY`-RC$EI#AFtGQ4w``_6!Mnyhe?r)lgzF$>p_=v%oqbz2M40{sy z*HG`F*AH<-X6uTra;B0>x<}Q-nI|~EgotQ>}VH5j` z=#T6O+o||UzxjUAn@**JEw?kQDY_vgm%GPo6p~RjdE(m4lDc13GZYz)Op(Ukabr6F3l{Xli2v7lhId zG)?gtj|B51%AY>PauKn?&pr>>mzj>|rtpn+ZD22*ta)FNJpQJJ>Pqa}1C|cnckQjE z-J|%MIfE;^u8=6A(9W?Mhf3;|g}Di@Z6T|Q!_S$GlD0TSzz@R2+2s(rKA?orP2@|! z+D2G9;k)_(fP(oBY%s{ME=<47_)YN(z)}%lsoCp=^hbt4w()K$wEsA~6PlnLzI`=s zh9t?~U&U1z8$zPsyA{WU3Zh;5IhZMc84S5#$`Ju~>Raac>SoKGsB~u0seNIyHjDp? zDa;Y8`Q$EVw{b=Z3zNa zWWHjEADtzLuH{K!uqT)Jy^N=y2RcdFlP65|Mc=~rbEKS$N~1JcUc}I^>hD~Yh5j!O zd0;2__KbT~^oU=;8mFKdGC!sr%TPYP{9Xnvo8X8mjnove2k>Z-Ra5R^=2kLL)N#Wm zwfM($<+^{y48Vx)3;vqC?4V={6Chr&2oloa!ml&s-O#H{T$b!EtiXukWE<2^hU}zT z_PIP-Aaehjs|Nuu&3ups%0S*DsYTTgkYHXVwS`H%hOK0d{A2 zy1GOy%6zTh{cR$AJfRq6K*c%v=r8%=kTBUOL_`8BYa^1eI94sL@ z!>#Jka%&MPFQ$&? zCsfbm)JO(0Z6eEVR9Cj1f}{DIlOor5lnaAs!Q!ALlvGw$LZ#yjj&$-=gg|Ux44o0+IqtgtiR#fgJT{xOG>`k72y-`+ zVLxb~a9V-UzZ87n=FG<$I`ciH7mh}ipy*--MZ-ZUj8E7#Hpt6zNBOC@KdKJ z!}>SzV^x0%14Y@_7Z*K@mE!6WG@Oj}XPKH}Qa<%274kj}J=rrkrJ2c-Q`I?Ax$yZd z0rWM<{-t2eWQIM3U#7jP0tc5D(0*((!n8OI0A8{)Iqmnd+$8IGkfy1mZ)MF??r#E- zX5Uzq-h7T9z-WRoB?8xMyds723iHA{M_^8_%cEsKMyUc*f4URskwJ$FBRO*^M?fJE zxQ6yF+5#nyx#8#mYI%zGRzOApppBsw<>zAxvb+wnCyYr*++k_IHQ)i@oC9=ntfg7r4(8C+a!Z-o%K$x z3?C-;KdFg#*}G%-Ju{atfwh;_)Z#@-%=Js* ziLYE~Nd;7e&p0*Ui3Y(GXMMZx^}$&eIn48JHCb*{|MMs6DnZ4Ai(kxU8JKvi`SyH0YRezIlV>MMOhM zwf#c>r4+(AJOuZcnmA~X`%8p`ozyP)+k_r}oDU{6^M@V0XT=M0h8fGZVojH?h+P|E ziv0vIzaoy8j9nhYRrT(|SfOX|w;P=!h>cCOo?$IQo}&jR=l($v6+8T(cdWxmMNA)k&f{gY4i-1t1xZH_FF)@ zxRN1O)%9s=@rRg+t26g#w@-r@IXq3iG#fI9rV}V`in17!i!SPoy!zuNOT;#4&^HOf zD|uyaM}6Fy55#cGf=&h5nNt z+pO+W!V+Iww$x)sXBD=+EG{Y>n}g9O!&-SK+nGC(0$d)_z4b0~iH-n}E8Emt4LUGY+;u2^Z!JghU?uaXcR=NXob9zbI@VHQH)p6y9o6*kQYj^! zcYUmw-%@Aze0dh|>!(pTb%&5Tx=LZ>ij2m3N{uXXydT9;9Y8o|9`VgeQ1R0;cL z3qG*)pCz%;^7GQCE9+pv!U)oKw;f~;54@X{Fn1j_Je2tCh5`+;*_vne9eygob$4q- zBXCuK>3;vd^~!pyksd1r^Z)nTsd~obIIQ&Jq_|iPejHE$_ zx8%czI17CSY$+gUmq-MotoOdx2EJYEsc-qC7$e1JeCW@^bQzKV`m@9+z6l1P5jQiE z8?GCE&Y&7ZQh+$2<>Sh@*XAGVKQe2ydI{zgca8+CQsyI8p34YfwN!;MucZVMTe5M` zKdR;jqz9bWcqQTacQq1Z=_BACi&M#~uCn~DfeoRzcS&6y@GH~Pu9`Og^cDccb=wPS zZCAo~oTSGmK7K^zVKWCZ z2zctSRBdb~)nsSka|q}-k@5TjkhzPy|Kn}&XFi_ z;8v8fU*^w$?I@2*$NsG)>Iapy1MQ2sL?j=QJx2eV;1+i-x0k7-rX)KFV_?9TqvFd} zkAl44J`8mhOEVYM3qIgz(R;vG%LV9X&)5m0$m0*p{`8qrVV$O0qaDIpvZO(=)3<2@ zn6jM7Z8x)eSrl`EkA;?xKE}er0=5Diwown;htb=jWo&ViG>0INLIakHfozRsFg`x}Z>r&bN+=$y3Yh{i8{T#loC*q9 z0BBbPbEKrCWKGUVz{?NaYgNlJ$gU;FVlhdb(Y1r?scsXKyQ*g@C1~CD#dtM_T{X2e zli4j)0u&YbE=$Wu5bIC)dayGS9~{F+fj=7GQ6Smh3D0TFjiVLDqBjnWxUIc@6n{be zS&@jrbxWBNZud`+?j{&yBL!hb5?6=(i%1)1a9~Y}xO%clHkJ;XQXlGOIZg)EESWEG zS72*~+=T4lh+F6G3FoR5e3c)NF;mg#2OMj05$m@rypnnimA@X76?Q8%#J%-t4*>d+ z3W{3Th%=70DJXn#^ieJ+_}JvO!=;Ni*W-1F+4VykuE2i?6QGjBmN4e6-*8g_%*eL1 z!yI^@ao>LkkME*c@YUI zB|apzy!UC)&GmDT0`N*CU%^{3w*m(&(kS=tZD@@fEBDr*KO=nmp1*#?EdT(ADUl=% zboZy;Cv{fO|Au9Ddlt=Nr{jpDB2eENLBC&v?sWWt-kwskheu_EFo-hKcf;Pzt*d3f z7RB%y-8VHgkr84|Ju97LSuo@%`({YyF58Pq{DqKW9FOI88-{iqrIPqri2XbwSh&u6 zPYw2^tam5{pRQ-d#vMmwwfI#_y>G;4)NnFcza5*Y(F-XjkeNpK`kE|P)#~R(DG{R? zMo5{Xh|lB^u!-D^UajG@Zd7I&s#2`)#MpMJC$1*twZgBhPEtXlm26S8$w zbia!a04kJxm(QeAQK5k}_uoFca>USb&> zn13J==;~v4Tof$hE1zS)C)()b7&oq5Lplf#kzkZoKny_8J0d6AkC%nYly=91=I@+AlrvYA)w9u<5!VcdDT&P&`}mpBg7|$ z2}%vBD!D|AF@U4XZ7(k@ba_{Tz`2zS^DAyLE1d$Umv+Pz0{nX4=AJ-r zLhYz65Mzh#VfUb{{l1HVWbC#yrM{qEPYhpq^`NOASdxbO_=J9CTgQiV>nrj7XvvdE zP!ms~MH&?@xz&s=K!}5-*S!bp)-#Z!+numlb%g@3{=Hu$iSZD^C5ti|*m(gUG+t8@ zH?*h>YpOh=ev|UNaaxh>Ox8Sk4$Jjewz7IlBYY6E?RDE32?6f9$GN#GPhao5Fyj1n zJwGRmKO8DbZZ{}lxTE?@Xd`zVg>h2hbcqs=%b2aCu~z~Ao}Rw3<8BB$=eFf#-#a%i zEeJ}@-uk86i071|J=Tyy5KBL~?CYzbj549zYA3^}`!H)%R4f2is2+t1=it&Dohrv= zX%uYHIk?_z(5mKhK(>>WA=hRVI+k-|ehsH&%Fmn%@6*17abrt(ziBOw0pl}gU@b1o z0k>+-eN70K?xClKvy+VsA~#l`^Pw;pH#XgK2Q4`EO(2di_|oFklR zlrW4$=h6gz4X8Zh*_5n!7PVWRzu0k($s}TA9)mT!%Opi2g6NAiLL&1=4`-QuZvuTx zx@z9p*?C7o6B-TBu#*>mK7fwy5t*lehy`R47rEed85fHFd~I6mQQUWJDG$fq3+ROZ zPKf@+Zn~{uXXO&1=R_KVy|L{M=W{6+Bg8i&g|69B(bUKbRYUf!K;dozYZi*I-3j;@ zaM>sOEqX$w8=eldU_I}2>C)Ph_GEzx{2e$#>;kP#o;o4BgI#}rlV}4e`&Y=W$xGJR zdIuK6`)aswzH>3SW@K*CBe6zlg!nb;v2oQ@uv!a7$&r%qk9L zgDY%f^{swSUkeLX%Tuu#$mN-l7#1t+A(r9i3<$NGv-`p7Hl}rDU5yVst9EwT-S&Td z15i`D$VU1-b@3jYOzl__)ht%N)Y}TW_IqVMmfut*K4iuM6nnqgUYeU!)hfIQ*aUj0 zJWt+yK3}P@&kUj4XLIMLoaak6&I8M&A8n?_>Jh8_sU0+{-^4sOZ-TD-23_ zJg%B&_p}>;w=u>KbH0VAya>a>$C6>%@1J4)Z3{LK30|vT9>beXwl7ZTm%TKA{1&2m z`H#&=!i^sc68lRaaUNXnw0G_IkEeodZi~Nsk0lL^wRpHUQWQkqU&qqfj~~&HXzp?v zIr&6dG)ekxT(t!bVA3Od-iwzH^U7U3f4-5*eOgk}#_8f~fxmAiIMp12Ph%A<4yj1T zr_Y{or>#<0rg{Jx+4vzP>5o_~qs1?Nu=ZTYg-o=h#QK;c#htBgNi@P=S6^`klSJfJKEn2LlLx`rv3_y>Jts?$FSdFH0{gi&0(| zOt~FNKbd|qKqLg~83nFs@w0j!CWXM80LZfA5e0gQ|55WxVxqx`-OW?pWWKyz{zhAu zhqrH5G#YQ_HIj-M3p^oUpCe|>i#mn(@BsXd6mr(DIviy#oxe6AjO!w>LT4pOwbG+Ev@upJDxQ2?wtsPxm z^O(}YrIVkZKWQEDQpECavk|LalK9ILXW-p7sdSCDfH%yyDtA~;dsFqSyz?xH{sb zLLp{|V5JElCg*$%v)|h;DuEaaV6j+@ll+|{c*$U*5oWf_mpH{=KSgvfJP?;cp&&xF zjx)ng;BgFx4yw?q|7Ph05?Fz-(0E|c3G0$R{do4nML7c%Or~xYUx`O!iQ>so_y~&( zHgc>_JdWuEcuevF*ZO23jcf_~EITS96!W*`(Y>(={j>*RnvD+en|^$k zI`9Oez>|;Wy=t19c~0B=joTt&sa)DfY;65!=YsU9%wo2;_Z9mT-hROrNYcgIO_r(t ztS1FbesPm`*4}{qWzC~ZEE6@Z^qvJuNpI`w7}}janR9vfOY@bM`w$jk zGy>IwqP9|GLBRPGTd{}=S|#57hk8MRD|u}3_?cAsF5i3tV<3l}f#etamjrT^I8}H} ze}}Dt{ZuyAeEgmuytpv>&+NIa`QD)u)pf{XGwhE@*e;jS)J)BUH!?))PwQ2#n&7mD z7`!c)BF308R&`X%zCiECQ5T}eQgVio&&U0X4k*N|n){)0>JlSPlTFbi;$EiGm+>T+ z?eqQca4zhtOL4(VU4rYS;>x+?V%V#JNeClwT`j;-JX1A1U52H6wR!IMkrX#HHU?sM z2pu)TMU}lSv%gqZzb=&n!*F*(W@fD<9{wnY)_p&-L-NFCju`l_^C>m-1#286`zNPS zunUBT@`_@VNN{eX_^dp(@^juewA6A21sm20psw7x983oU*Bb2kN@l%%<)oH2_^J$a zxf~m7K{`8~l|A7X+57RVx1jJr8x~^v~_~bKE*#_#pKchaKTyu-wAVRcAo?Y9S5dWI#&S1-+2Tyl4;gv##NjxM9)P1BXa zC4O=bLjvu?yebwExc)m`wJ3P{h2P_^%zw7Q>q~gat6(!GOy1|cn@S4(lT3>{7vS>~ z78S*uk7@-QjTV3Gl91eGN3KOwzUl4e*!csW6>-4g?rEA#rtn&j*n|>N&hQA0ndV7I z47XM(Yw^46Y7?w(N570~AvSV(|GIhK1L2`43`#{69f?45?~Vfp@FGy@U+=Yzt}oH# zXRX1ku9)y3XVie1(~+iBmS7fZ?HJHkfU$bFJyJK0^$oqwM*0skFJ$Y2U?{hDFXS*( z=TK%EU+l@v4(&ghX8{%A*2{x*nHN?Q@_%dMo)tfPnIz{J6e{$(%WvW{>x-`uFV8Xj zYI=HlI2K@R+Ff+FYTKCf^~-^0Yaf;GzTyc8z!?9LYbTgqHe$D@!*L7)?ylo*o_j7> zE6e=QP*D5?xY3;*G z3#-{;hM7mk;7%+uxRLe{rtOjvjsk!Cv>u^Nr;t*D@44a=Jfu}U4=9`l4OMg750ISv z-C0gg67~qJ`y0f*)vR}aI`kmWgB*@)5w5#CcUI?_R;je0k^zvm=kMa4*IK6y4MC}) zz8I#DHgg=gR-7yI`g`Dapgu7iL_{Wyl6>d>xNS|zXtM90EjQHwF4v%&YLdBiJQ|Ch zKLuluB{~3iRQgGEJ{HF&kn)m?ekl^ae^?IeA1kj`UslxO2Orlp=%|Fi(z^H7juUeW zV=RKd>soc0(oaECgHtu*kb?JbzVv!mC{(cY{r-&=2(j!s6n?OR_R)`5tu_rjM0Wvz z*jmUt!%T#md+tx=v@KOhhmYk&5{%n;UFUywIv~<@&Q%j@SPDMiryYVE7a_9$)!>w= zgvaJl1@jxw-_aM~ld0-40eb*grXQjmpETTE94}mF4W&pbkQ?D1(q9fNQ^n_Zr;Hpw zEwNh-`Mlj#_7V01o)n-LEq#-S1x9$GFI_4($c8{;z5{gFWZFQ+@hlEp6^Rk21>LG( z<2qCeZ#;nBGjq9>%_jRx_>^6Y5b=UG*V`hgzVhu%EoB9_R6BDQ60|an*!U=cUFY}O z*1lOy-^X#l#n)HFQVla8q*ghD6%&1XxR}%3krPJ`i2+PkPfu?9a9NI@;|bR2*M#5f z*N-(N+1*zw?W&#bVpp4o5~{WBnpF3i;pyWQed`+Aq?WkXOS@Ssj%E|7aJcR>cDBhZ zuH9pZR;us2hUE_3X5LWV@I8umGijf^Z1yVD`TvMQa%Klro^4UdVhl&yt9`^%?s zU&nxz83`OWJl}&m?H3Y~Ea* z?y6d7zA*j&^yw37pWODP$&+K3cJ(sZ6nEk(B-W+l9@l@QeT+WeE6O(L+`2kK^Z?t= z=yK;C>{#=z$HoJMbP?smo`$Y2!>xA$J?$g!ElFp;*J<0y<}PGAbjW(*@6r)j6aD>r z*ZrE`zQR`jy(twtjS%vOr3F1+h?&3S04C>K@04xZxr#Wc<2a=iBo0+RIr6x2fuV6Bdv-c}_G3CAf zrvIsNN7?!AtW-Rp?Zbx!@7}#zXpjw|zgg24gGO#PJa)u2ifR@i9f&&RQMr!7jWSK( zOJLg_E*P&0;JNx6Be0b0;RdYa^Ngt8?g*=A`!LeFd5^IP>hzl6H=m=UeP8Hd>pjF0 zY}Epa3m*lLd;DzfFY>Q@%CO+cx)wwm=JH4i?PcD;zP-d-!0HNgHYm`z=)CrKK7AlS z>(RP{wUM$HS?s_l_$~6r&pAPq_r5C3hkN%T+J)(@Pl0r#3ffRarA%PKZoXVTNvfVF zqF{h=Kmn+u;%s;Tw+9qzRjp6$tLeFzLEx7pR_*Q~14XAc83>fRkeLr00EttA7O1uC?M%-{9egX05*uz;+}lP8OS6Vw6$cnp zxWf@5h&S(>_#IGCVn#yF)ZKliw>L#M3tg$I79JC{5ZcduG`IQ(f3B@%Zkfu=6cG^- z3c;$FepzhjsaCXhI_1C0x8MU-b}r2I{6#Vx(g$>rlNA21U%ysw6LHvz2gbR(7gHd} ziFhM%DS&4Yt0qpwK2QfLJJzUVZ37?xZ};LqL^BY;_v{&*_B!9h*_8iSH;Z*04|g5- zQif6(6=xT1#{%ih3-B)Sgkq~n`Ob2Z2AAD3Kv#H0XoxhirO-Wc4=qaCsy*Vi4ff+!$eZs?(2YbIMl9~oxZ#z*wVpI;L0C5ov zdSH3B>vUvhI>%zy>1MR#90eL^ILAS}(hBA6@(K!{(A;))$?xAk;3*PBD?-W+iaie1 zYpQu0Zi{ab<}s;v{qifIZ<-Mv@~r}_OA!$^53XN!^v8&&UFvxeIOrL>n@e{9GrZKs z>e*GKDz@1$EIYL}+Fo?q$YL$KLRz6=z|kwzC_V=x=l%E+T$T`vU|V+1e3n({bHP(4fTz>Ws^j_}hrE2v+q( z#Momp;fu-^+6jD75O0ZG(=vHQ%cgRKC1Hs!DZ%0p{4D@3S;w1}Bijs0qlS%Gto$V} zF9ff->pTW=j$;bPE2;pLjr%quwrmNAr^=~#%HVh948~=@`Omgbhoc9eN~&rxvNMqD zx_Bp6t5`d2sAf8eGPcNoUv*hvO6GU`h!Ydrg*Vmsn|tKLT86!2zsJQsSrTvXDrCE{ z9`hY$wBV%Dd$GWKGe{G2%9 z7^_s^l$iFy<|{K2{L0*H4oa^WaeXGc(Xpe~Jmmj*f6i;%so?!Wk;>wd?HrJ`H^aJa z7Hpk$HDXdvC@sZOE9`A6X{zS3GFft{`?N65pEF4^icFt9S0frQdj0g^33=@KCT;%rS!f&!X@8(Rg zy~x-WjysP4_Hmx+=v;9Q3n!&Ks@j+h*TG(gMBw_g5AD(DB;P26_k*bTek0(=-j&i+ zjgSB%`eyd|?TCqE{2kHQ+8d4pzGUSZ;YK89iE;~=ka)H+aqjEo67fnyf4nOZ1zB}- z1+J?Wwivu3DCw}`q`r6ZPP5OPur;8rw=c#ZFLlJ%-X7RNNDbB#Hx#7m(33OzZ6#5^yejrdi^z}!pL$TH0$(dT zrcy4*i=1;TUC$UTKt;;d?Ht+t<)VnZ2Yeo*ucrHCrT91~2(frza9`+zuZ5PM!WwAt z!J)h~bf<3}6=i_wE9Rs_>QON0OC05TsQE@F8;;ityENzsr6h5Boa(ih)jh!TOafFL z@k4kIN5b}#@IeZ^(BH|8Eu90de|>n5Qm)lDf>*;b3u}g%W2scPz5B(BtGIz5))rs6 zg;)*9EaW)$zB~E2d+^gC^r;{dejUh=HByk1Tx_xZD1MUX=PZ$ z;lMmo6s}mh?)APuy`ugC!Euto%%`4VBN2(?J!4@Xg&({gd zGyWbZ4@`IShsyw1g8a);10a%giJzZ->^|<@b{O3Y-9}h(z9qLy?!;jKwX}y6j5p+4 z>Spd!IORAoSQ1w+h&~i_9^r7%xKWXbBFEZkM-@dv87JrE7H^d^-Z3=RmbHye3%=O3 z{b-%S>+aeZ{&ZG+uYqLuL*Py0$%J18OQ}#;C3Rn#jY+qb6N%`2Ix;njY!t?=n9P8| zgNhii(f)VKLx-tLwi3?qHL6+%wZ3>JRa83(hnR5&D~qp;E85GX$qwTM5>MD6hqMn! z2LRL6*22vcB|hY)kSDHAEDy)-wxt0}X!&~{oaehOfFFe;(ED8VX-KY!IKUx~eI3rF z=%suX=9EWnfH#`VyJ>~WFtyPsEX_MzZ;z^$1npJ+T?V#GY$NkK-Bt;nMI&Hh-+M{I zB(GnGHm&z$Rbk;zI5AcmNZ3m@)>4Vx3!!z`|rY6 z=T~K%i?j)eb}^#qg_+(fCqr3b*h#qSV4d4=A2`BS`?qPI!RQ3{j(XIlTg!g`RC<5* z`%5UF=QrChea`G0!83pbH^e2o4SgTtfZLOcO6D1gKsZ zvoX}?W`h#kcJHu4vcs>$UM&OSx!{;QHv}6f3}o04ga53mC&ps$%BxsDjkal=}E^}HFpH?0a=H2bxXRy=EtsH0d<2{xFzdh*A8)DnEDf|#(EC<*4cVYY6 zmX;}Waw6Krojws4ZT@@Pb^_!^u>D?v)A6*b3LCOMe?GhE7S1@Bzee2tYFd7w-)0sA zUJ_o!+cb-?G!%o_W+~{`S-#^V$FZd-A5hH$J4xZk_OQP=QNbK1p;6+sLcUcUa2^LFo+d z$1u3Ox2%#g+5#@GxMQZK{M1uTt7AsydsRY|#TYhQBBR-IMT&VFLQt+oFyAolkC*1k zbQzh;y~Ef4{f5Va>^h?@fE~tOCQR!eX#+}vcGbWDT|aEohiQ6-Y;JF0PbTQDaO*cW zLy?1@{iYhC$u>qv-ym}iM^|QMob%eZG8zbRx0@M91E8NU5_bn42|$AI%K(;op(rJG zdFVkLr~mxI%&wYne&ZTIm&|c=8)gJa$+WpMfJF zh@JOzbdf}saqd2FLS4|5_KYE&xN1(X^Qn#w5~ny={xOuoLZZpHk)!tKdMrM!APZc> znQ&&GPcdQ71l%H#%eK?lzyR|vl;L;S;j&RAFJZL=D>SGiXIO#;B}VjVH1EF$P!AK8 z9OM;+Z|>kdE&XUMJwpUn36S0TmMJnf_!RHC_?9LAHfVIhZHEIHst{g*hK7*L%*Yu3 zLyk18Hq={x(*vrC7|WqYpTY|rg%9hzgS?N>4jwgM%re(EI^lJ~?RiI|v|rXy(DY#p zp#l7w^Z7rb&ODyVc76M0N+{9_l}rg0%c?x6q)ZhNddRelQ4}&y8Ol^BQ-vrQilQim zWy&12NF}L^8InxNluYk&+dc2EKlW$;u{X=T?(4eF>pYL+dl1eslA5zm{u`MjK7YRSuP6=a#9-Fg+5gXTh?GBa4)2sVhIN3o7G z)BXJ!b`Y5pgR5d2qr$D*aSufW)z*dq-*TKEJewN#Q!dTrwDUsPYQ%htAS=Nz`;XQs zKICs=E+0|*c8uZgbCF~CkY4=|H0KayB{a39>wZmY)1RQ-*{g*VcJlAx{y-1ZJ846I zzBAZb-O^wWPqQFDwlRtquju^F_;zdtLP{u$a1((orLGs3>~41F%%s#*_dZNrVRvgC z+8+LEe%4h@H>brlD&_BDnJ6JQ%Ph-5Gwd{C`zB_8v;XX&+AX=gYg&jry`qvL5QQNj zk?B6TpupmFAt__;EzR!3)ZJfkT>vo&;PBi1&d|yGO$J>VF&X8VW}`=w!*s0$a9VY>AN&Ix&ZYE*gBJ zuTc4pd4lgsdLHhmxLs{&{qa!mcd$Jz-&wOb2Zeyaj+>ASff9a86F7)v)E=B_0-}$w zE7n^5E7dVH^}0W=S03*^HazaIZrA_LpIo)(=a6I-BXkC-wa)wo5~QE6eRtvgAUts} z(4&=Qn+Tz@S7pDz)j|J-^(>u*1(|4hV81{wtk&vj%UP@gd|WhN07jkLr5V;sgZt{f zUx5Rx?pWqtUiB2s-M4Bk^Zfme%}W>s)?S8~*iaIGY`!rFsCZHmq^dvBxv*SSR{T2- zrWZOBtA&BmxO04NVbSOq1E}9na4*-R$ZZz8j@ZF?dPk=C%A6u1q*bq3rm(HURz|Z~ zkJNw1o;&dG{(BC(sUae=9OxOCKe?E`;_QQ53B~jUlX8|P%`C^lmxWw7_Lgsu%luTx z4{jdsGCif)rmUy$K=62kNNnJ6Xk%A{>f9{xpjTNZF7jcjdir-X#gOI^XX9)8-**}E zso*i})cISsY{6lCD+6~$AQ!OHIAu=VJap;0L^woqp)Y~cMSVTu;ci;4F!ZXZrmkR7 z?n}j*XO0eS84^HaSx^#QS)-&F5B%RNk4Ir`(9qWJSbK--}EhC+~oWo#kE677cMGe$hd=IGHCQ5|JA!0SM$H<(sEQlOrGu)KNhmwl+#WnRqFHm7W(U zbTk$Q5~QEVc&of;^p&dcx->9*{`*cB?;BF{5C>!KDoHPJsNTNFnf5d@{sZMIHaKYU z;0Yn%mn{$#ehlM;|^HhXVsY+z&l+4l0-WMd1np_yS^|HLdT zrfTotPusq>K@y|h?}SGHRJp{Ay^*dZ9ntNGp-I)T1=0kXoXvmaX7%2d;vc`QlHicM!^Zi$5FrsSoYumqnKknlSP12^Q1fQ1o zuy`79q)UX0sy7E;bjCVH-e#}7dMq^KUJ9Y1mvtmi-#^!xP>`nQH4R)B9Za%!-BO2k z3szW%qXBV}a9ulGsWiW;{D{*XY^;bV@F|01@fp1Bc9;IX>70xn5PKRl%MkFHw^dbD z@88Et(hE;FPBVgA=(KWP{5#%sTYw1?3B8a8oU(P!65(LR@vp@aJ8yQ2<$4NQh-g$Q zDq?|RwZ)1)h*F+KCaEYlKF$3TcJllm&*q!+4$e}A*5Goc9&1=N84V@Zz9w7<08#(^d1EyNOZob%Lyt*y&XAId?K6|nLbH2$E`z6ed%!!YZhcgp z3c;c}_*?6ebMD`&OZ*R^4-O=IMb335qCh6I~NDpF03s{}p@X&MOn=n!i8 zW2)Ga4cGxrG-QcHpI5FB`)jy%iTpYoYf$) zwmZsQ-B>#`;fI3j-WIo$@{#|-Gm#H-EmBFsh+fKE((@OFaTugdIzUt8Q+B-BO@fpN z1L0lbukw=aT=nY$ZWF)Stocxw6z9O?B)j!fQt8NDvwZJk!!A+$0MyiS&q=|oN==_JG%BzOxKOGXZhI~=W zdmNlRyEj_2$@1BLDj1pa!FdPJAI2t!Rz!H`Ft?Ki1DA2}62031$FljxcHyRXe~ zwW6oj-Q8_*<_wVg_qxV*hUcLj@#G0tnCg>FXJuC&39)0ok(GtnA}nY`MFkfZ?qoT| zD@-xIX&b=@3r~}jloaJ&y39c-f*6P|deaACTkcicBShwpFI+z_3H z97CB+EklSRV9K9bPAgUwdHUqZ&+bab&`Pd@=@<=*_cU+zK?iqi21Hb^LKGJ;)#MFt z@O(Ob7qq^aIa4w!3S_I{@64T!FGG+B{%oS9=Yuy53&y{T!hvqu7QtL@4ff?mjQE@Y zoI=fL!g^2rxG5113avi;>KMzTqX~ERkMRA$LaZ1dQur_i!ay2a9=s8?wLp1n4=;XA z{#f8p=bRF=^`?f>oZ@koxlI+e+&-hJ4Pd0Ioc$iL^W)#uy*vGf6<89<$)L8?=KL5r>O(ahX4g@~5*CC9$D%Bi>sSQV8s3m54tMo-aq5iWoigRd-UX5`{tSCfZ z;zP8Bg-sISfq61dMBc$8X@gVxDtFFE|Nbj~-$-h}n8dB4> zkh^;NeV0Z>(5*6h|89;T8zWljPll=_R=aH^^cR=!fjL%f(}abX@f$m65E!t3Z{uNF zncFms@*Ag1zA>^6T}ikTp-;#ayiD~1PYSc2rwrh%p z(uI}MYqd~Ei>jjyvYcc9BhPRy&BT*yQdmt_q3K`JHg)*zmP;_>IJA|-udgQUFWb69 zMhoRBc7_uVA7s6DZihFCZ6^o0i$5^~PUOj#w=y0M{Q+2D%7-E^%)b^B$;pyKC=2Qh zq!t)=Ke&$6&ox#R#bYAmOROuouf#rx_Zn9gu3xj#0n-^63<-{hyZeT@f-#=AA{-%e zpgeIg>5}XcPxG-TJlh_=i34_t?uC=XSfn_U3kyAcd^DQfL}F0fqYfppbxaw{vyyk{ zHocnNUl}6q8_iUH!3i-$%c(|~>p^je3-xlA6*?zRD_0YnmpN^+twuKNv-pCFi`SvG zdTA$utyopoe>qm_YW+q<0pDMr$qUZ_fuq&0zUvvI9>aUCoO&^T%9A~^7bYhN>^naW zD*PMS3;RXWz%vKDm65&Oqb3Mc^40& z0B5^)xVqm3+Zo7IO#`?`*7X*xx>FOK|NZ*ny1hP1X3y0xY@#xV9$LTeh)dUk3ZeyA z16zl1AHva`_h@0kkDf=21ueG-o+mac->*N+MQaWE^zVGwSoJe_fR%UaQ_5=2S6sQ= zfxap*?T(Kl6dD!DLp`>8%+IgCFpr38--9j-CVQBg!W4Y(AB(q?$xqNVlqBbR9i-hj zI=I2x{rDSU+LPcrG2IM%(!nG{XobWM?l36xmVurBHp`%wsS$Bur<8qL0r}!maPyXi z6Tx5X$w)L%AO_uv1V9^F(C%Br}$d|!O zYP#@B^;^bENR56EvNl`6toznxd{@)}t|a7mxG*t5#6=(05mQqwXy)zFJPCT-R<@eV z^E9LKzSfuK?jEj8tuN2Gs43Y=mV8QF)HWq))C!i zi_2>Qed&c)Kk+=(w5ke7thvW(vyLt{?e=I@V0&*jL!>c|^KbV;Y>ZIsojH?vGW%f` z9AF9L>cQ=uktGq!txR~S#Ma|(=TTx=KRj2sq3?cH7VMetY`K&z%mHM8b1q&$Oo-CC znk=^K%nll6N?WEwlvmF01ew<@8WN@p`QJ@YRva$xf+7aiHsVeLaszsBHXBQiQ~WLo znMXy?Y?YgX7hShorhHzT;oUu@ zM=KNF32tnbK@%eV-COrNX%bKao=J%p6n65F)mqQLfndz6bi3lVvd($tgk_c913RbG z)d!L)nG52(|Fvr?zy!)}JhJHzrx%-(_C4mw;p{9Na8Pb0Hi|WD#*bJVhY{oDYINvs zX}-?m(9fb33)i>Stte%!Kk&`P38F&Cc)P~T*OHM#LSoYjqSu`@&Y@$F0Ck6X%1|-E z5SE07uCKPs* z5T3}F-pQB6CxQ-l)Jw5*eA2EmWR@pk%$+9jI&T<=`a@~YX%|qD;eU_j)dDmv3z)ph2&Q88*2p}gN(eJVJ#ypJ1}}}T7!GU zwZnTOBMzr?`MfZE^}QI_-6&}kqHK3icHk?aD z{RE~KHn)1^zt_QBPO*2|Z%Sn!rvm zjJqj1Xrg}%kA97P8_+)RJV6cc)*X&R#7r)=vIscqx4IHq#~9PQ9ehm01fnO(@>YMW;Ne5&o8w*+37VH{IXAvNS7XV&lOyCVA53?ff0m9}FU-5)piX_9WkjFT&!gpD5#mRzE zC(y?a^|JJKr0Pf{HIm6>9FU^w7?uapafSdV)HZ@@Rtmpc_VGHTX%afu({}6tK%nWp6MBL-a~DQZ1D%$H#$*555qHzzW-7&}Dzw zh8r0cA(kSRHc@ha>glnPO;R&;Hovcy-GW0r)KUj^8WcKd52}tU? z>!&}DJp=Y6KOf^!p_}X2LR58Yim$e<+V^XlIwIu=+i!?6mIMe`%`(ZT0lstQjg(Hj zS8jg3Jo^EJBxh(D4KB{TT9=gR8p2H6(C!>RLV_+H+8R74TbaOcL*E6iKA-G?T8Rp6 z4yKy0=k*7oLHt*7!sRD1L&*v9^!OllBHSFtVvSf#i-l;2Pf?FeG3@_w5I7_l?1|yT zAUg&_6uz2$4=${xIS=t(^bT>Z#KoEj$J-fm^kVB=T~<=Yl-^H2J^jb6Ly&^DU)maT z1uPWTo4VwlKlZ-2{M;+^9;HbDo8z~sAuD-bZu)Z?qjYA59R|8A*Y-zl4Yyi`)Ww$- z?u~?xnDTwipFYfA@ofk@5>=k@v84}N#ui$Zwq$Q*Dk>oq$1~&vmUw9XpLR2>(&viW zcjpZ+6E7bHslXE9con%=FCJs*jJU9RSkW%AfDD>F{z3Lh;e*UcM?wa>UGq!DxMvKX z4>h|{_ts#u2S?bGdl>m;^)s07h%#aMPkEF zPGa*b$cwa0fC~V)fdJJ6@*v)wc|5{#ZEOihgCfn*6p9 z2+x#%B1%?#~)R8Z|Y8drDvCGeAwe6E5w-eDZaNZSkvd8r}FXnnkMd#LNCvxM@G=( zt7fk>9V<;3J#<=%cBqVvW5vR!)U8bu7WX-FNJQq>`sg)+;GFxHbEvQ}(bhQ@TXJG`=gq(uGq*am8fsLMWSzLPfzc?x7# zTx#GIV+0r3J8C}Ca3`KeC)L)72NvCXlTQs|pmTTeKC2$Y&L8-x#xdrj;^*_xp<2V* zl9lA1z?J~bmue^MV?IuPXFVBk&32&AH^SadHGQ=M_r$Y6NC!BWY$IJfwZl$D>7)ApDX*JU!l zp4vqHac^y9h;Tzo5MALG?-dGrC3C3Jc}GWxmO)m0WoXX+)s^_C$ z_O}S8T{--Gk4u5NY|=gNo45a&zr>{Obwyn5go0AJxb^AIh={``PO8wqtS^yiy?f}| zdXj>#&@vN<=k+k8|gF#}9A$Tpunb9HjEXq%p&pNE~Y zZ>Oqm>8RX2A~zdZQlHjV_U`iw{Ti^F!`JIq# z;t<=&V`qARR-;Z8IT3#E^gx9bg%A57(F=a*B(J&Lh1#7qlOZ2owCcT}SyEHyO*@;m z{yjl+DLXZx-?CN^3mw1PvQ%>L&d*(Zp+6PA%2(wDXRI!uHx)epB${--K56wWr{KBC zADsW>s_ZT;{iDqp_on`q+9Q$eRog#g222KPQfu-L@%$1!D7?Oumr$fiA9D=&a}2Xh zC{6zS!FU9J6Ecmfzuwvzk{HfboiNqy^7Qcw*R1@Ovq=2yfs_eyy9{)D;Yx)dio!Ra zx5u7OQ^qekB~80zMf)C@<<78HK30#=$}2klF?R!1F(E&Jg*RNsfP*|??e7s7LGO>X z0jnHs<{2?GjlgHm8KHxnHZEXU5x7)P&{F$#T#ys!b-;Z5C83ARV%}Yti zyvPYzhSdz?vz?2v_X?LH-&=*=SBrOxs$X|BBKzLjjN07z`^>_s;yw}2|J>7F$EIAH zSstat@%vQ#xoj@$!##8v1TIevc7}PTy+u$F*zy8gi6IZt5Z99vumS(vIH+gy68i;oV81Y z@ndgfP)14{Cwa4VwGg_?yg__Y?|;MyG(;6^I9+O2yDoD^lt!bH;Vh6K*(T=b?LhM# z$Eq&LC{Bqd?Rmnj@)gyGTnlWB?v6Gji#;@rr@d=pVWx52b+pz9>gB5TDw(`{JG#U4 z#N!t?H_9w5aK=dvjTn#3T&G6fYZl$KP4bx2msbrh_x5~O9g7`}=E&UnE8b7=vi!@o z%fEcOQdRby+PyfuofO#qO>9NDJ^b&l1n=73UGa6LqfNKkik(aQ129L1@CA-#Lchgz zqZrJ1s@%DiH>C=BA)>#`unw> zFSOrUV35Jj3aIxkyOodKg`^Y<{sc@!c|P0yvY?ARb7$>d1<|G7tBlOWtG~9XO_$^@ zX3Wa6-f!;vyvOCM5oWqor#3Fwmuq#MpWYdHNA0xh#Hvc>Z;A+bY+~X4^u1bh!t=-Z zf3zTSn1xQs9i!2Sd1xpQG-+rEYCx>0>CE$AceeChW8$qA>LBmk=d+n=R-Sybo8i64 z%|F^sUuTy@$)hBTgasUVXr2-ABGFxAAWn8opLF40W!yhJY>%v3K8`BQtmQ^<02lp+ z5j&Z(5pmgI#6x{FGlwq9;q-M011ZlFZq8gEm>slVLn)Bq=q;Pzts^dw99782O%VP4 z=10IC#x1K}wpK+2>AmVdU+a9^k`J|;MK5q*XgJ0c1GBHmYaFSC3$*bZ$(^Gp({fdrk zJ&}8T!SC0P4I87*NYEyIJSAv{j(nHNXC6T;%uHp_D(F?R)XHa@?l`(b8sY$9iGH@e z5?4i7zsEghCf>6Yse%uDBismu$^PpRT|Zt*`GJWtPwHh zdRbLBSmU23JE5MnQ1dQw14tWVspWpT{E#~GI)Cw+IKzJqNcSJ{JICQ!pzq~Eb^vWwVVa5*01TMK@VA=%CFz$cTcK@SHnC1Kt15MtpvB-?F zHOKeJn=*M191yv2_cw2S;Nj;xJ-*zk;g7Yv^TFgEbEC(T$%$aj=tIG(r8O6(WJkSs z3yKJbTD;=ltx^f;rFIbejmHq^aI(Z#TCAu<3?(g0`ap)tvtT>of`~R^YKd-l0mT=1{+mI_a(agH=7F zU>MjU80QjChvW<}<&q8-Iv-80IH#~}i#7in11w~fH~#sv&zIYYu_R<+r`C5n zr(I#*Q)oF3xHzSle|g2) zIH69A%O++qteyN$9kNg-smW_YR$A@nAI){Krq=r9Y0pHlr;iHnu6f3@M_MXSl%^TC zRn*1ZApMjJReVZhyV;^*b4x&S!=B*T;rjB2+3Pgetv|kb4Dtkt={EuktwDKgnnXNYW~JRCRA) zmz|i+*-XVWm5CQOrgvg4tmL?Va6+Bn;i8`AYb&sv1Gpb+o>>W1tW%d1*zIc^Mf4P5 zzZ?3}XQ~xa7Bb*H6$n+61lx!5j;bPgruc1nB;j=th6XdI98h9*yUezqy(CFJK>K|~ z`mmLxAQwXzj0k4gEvT_zdn>Kb zx#*@F_ZhUDuFs!!_0a|a06q8iD938BF%XFYGxp*17>Bm_xgQW`a=<^>8?e~41hfcY zQtw)93qVgQdhA7yHdw@G3oYEh-V^kz%IG}2r{K|>@2gZg_Zzas3LP`N^L?$e;=37^ z>~i&nl^ZF+pG8cXb#@&v43DngZRD>i@HBW~ScX&Len0ELHnST%^^cf&mAEW!)3NBX zvY^pZXhQQ=;!v@z#E>9#wZp?m=d*LVby5>k0FWcHTNDJE(7^&|45Z~*-@6G-jQ5|x zkZ}DK)Gh%O8ajq(Uo4FuTs-nn5AdHk!h#O$wKO~V*InNov`^EIJ}5PM)nCvyI8pA)w4hbcwVJH(b*pHeYTU>AO-E>Ryi`g0zWw{>7G}C2s%~$;?a&4t z!#r8X*msp-oIZ)5mlEA06LP$FGN_a!HTBqgu&yv$XkMrH!#st|gH|`KcQPYoLNP~u zf4dj+h0>kd;i|`cOEzg?$b6~z$rGokp!+jH?W>g6-B3n}I;Xx*OF%&=OT*CPq?oGB zGf|$cdGpij7x}6sMt_jQX$m!4*A4CncrOyC-k>-rr@sEIS!Ox2gU9ZeUeV;OMT0&5 zQ&E>@7;fcvg93?O#qxxZi)!&cvULb7HwZ;B|H9OG6SdnjbH5io&w`)J%|W?petjka zt^wp9>}I&b4gABX+rA>|sC2;Wu~Z$}=k5~{(UlgVf$UfBOwF=3HDx?x{s4Qrl+$6P z`A{Ux<7{kGLuG;7M8hh5Nc-xWB^HzUh}RMl7Z_U+I$&{cw{7d@U;4lEy5EaD#tz4% z-gk6##|s`$GOdDjm6yGIOP z&m^;ld;ogCsS#=knIDk8kUCXd9LPP`{KqZ$qOjhRvzqI_cl~-$(J$gCxBBv{U%%Q~ zrr)I-P3G5IAEg|1Y+Xs3&Es?0W~yZyu0wgq=sVsyD<+eg`T2| z&;$~hK-@r0(NoKwOl0_=i?MItdzq1-O10Q~nqrKuYVZi$kfYiJ{2X*(((6;^l&Gf2 zaZ@FJJkDVaSzJ#&X>jIUt4PwF#%_jFmGA~*GnT2YxbYQ{Z5%AjN*6ba(|2xnbS}Fl*3Ia<+Dv*PtV^-DZkPL5TYKl=LoaNsFH+!#=c9=YWFPtTaKK0b0 zO$NGdXkt;*0@d!09O3~o%y97$-zs;^6_ba%MyFr2$WnqHw2f5N`xEgWe_jjAnF~@C zH=B>{q*PrR@NO$W_+ok2{f~2!18N3i_rkqGmKYYUI6%v@FCG( zWqLJr$!DLuLJBV?nakrw)K?I8efx$1gXwJzxAP#3<6%Mn>dc>zUokP?QM|RGO!tC} z)&&KDWzYexXC)obnf+^*D)IX9XKljUhL$AcGUHxCMDQYdSxV<$s{QiQgBwhmS+! z{(CP4mc4-0q3+ppy;sgzRDzf@2-U|kzPwTn`DafLI1!kI-kqkvL5KfGEYrlebQ8lL zsHQ@bFln6ffww|yz1X*)Zg^!qFjCxuynxs;+`}#l?H)!-y&rT=?deS zbxgC)39^;?=8Bwfba)#Qa;Ghk7QzJTO$U41mtf%8br zXd1vA5hB);y42Gd#%|;QzB*_0;?c7sWQ4DWrT~9K^jwkDLkYIqf6OmzUdZhII&8njp-q5;1zYEdhga)PMb)KeAxo=JOp02g zwtiC(-NrBu5&>|0ZfP#UN{v|(5~xtrB8W?p(1=Fwm)=1%rKL5i);jb3Pc9)MEIq!P z_5_~4u_q2D&Cl_9$??m9smPzlP3=ld!_&fRtq{Ey3<4Dfe84fMzh?+Mc0(1HVC5Tj zPOGmw1*=kMNC$z<4_8*08IO{tq4jhTyvMA47A?^u;yB_GqU~C^Wg7F0t`-Ao#ZN+mY0KJo|Wu1=TAlDsGab zw$&pw%DeVhSLv_cC{n}~zxIBA^i}o{q585)%GF8>CnSvye}0BZ16v3ZDzMyNzkWT} zSOgXGz$~=mYcFq50}4N`zb)=#TGkH~CiFZ|fC?NyPIH$fT{$L0>R3IZs`4(rX4?jh zR9fkWfCl#tjA#X-fRgT&=coop2r|x(crQzss9V*P*D2P-D{nj4r()Mp`Xp)1QVwobQVLv^mi!|JMK0CDj_8;jS16?qDfcS(^06w4UWb#J=9TXQsFq# zI;A5a8s0NERqNCCn#xSpp>{4P1^#kuD=>K+H{i{rPQ|35Vr0d2IGV&UA2E5bp{Rdd!W2)u7F>7yw4T4&qpz8u9+oU=J}nR|zxX5sWj#TkXS zk|^|H&BN}D`19HxK<#8U)vo%MkzxmtaP^~61U@psxfR*_`$fQ3ZIbb_8dNEIsb-!7 z+=P91fMEcWFJ5#o{dS8&6E5UXkKuf&Lt@d!P+k40ne?YPUo&xN8SM%(}Z-y^WE8eRo$zqB4fJ}_-7@e){|VHNuFy1(U(WfS@Gq%-J^;F z&BQCcEWGURO-z$OrHbz4`wfBcSvB0Qr z6eVEb--C9x|1&`w*+OPX07~j*v|>e3;4B*m?#f*6Te~;hAjYOhM*_Q%5f7n;eVuo; zpQo^LuJNt}aUvcsGlM*rNvS@s)U7siG4bvHUd23G#e6snX&xm9k{Z2|Jx%3ej)J5T zG4ZnZlsq#2zBSiu4+q6PFZJ8gh}w+{&ikEBJt=&8+4{CTkI?V(Mw3XFL@ZIFv=H1_ z-v=wrfPrly9P6UXLd}!9QBi-Y9XY=2pCq zNLq-5_1Xr-JX(rK-5@00a3$;OKZsL0x9opo{pAe7GSUK1Jj4j%aC)V4J8aC;(;m2J z-*e44(A4a1+Weegbz`H$H|kkd7CSbVVP{6AmU@k&bcK_zJ`dUOil^n6t0_OJK=JS$ zM&Tb%kF$a7XKZ-biGL=nO5?w0Efl!Z;?p!_%UHWJb4*H5AVQK}P%!=Q-&Ef&Pt#VJ zMr47f#+H~dt~vIjI41);S7V-}dTftD#Un?}$UpOMWwwN=s_MF#7Iuxb6|DJJ2-Z+D zvqiT6>A#DIcx(uV$3i)o4T>Mac1X3jmR4;*LGy!i#g;T8$&x!~+Q#U`P>~Awk1|r> zX<1oNDiNtAMmuhT#MU$Zd%)g5ywsY4$Q`)qM7cptNgeJ*w4kczF zZY=ApJb!ZGXEnt=;b~ABK?X22=V#h_@Xu|YzhA!OgNJWr9xu;_ATt^Fl4Mn0<_N-Z ziO`H^TWYEe+Mo5KMU<6=jIKIo9Fnu1NW-{X{V8!eBPAC(nZ5F+t?(w*Ilma&56npr zB>7jm*8cab(0+4qb8~ZX0mHOg*$@AK+>WRFg(^I_z^TL)D+>1z+NDH#7Z9zl`|T1J z-Q26|_xtY;6naanmugD#0LtBQuLfrftNvVBlm6c#;KE*cHb1N72QsO=6>n+gJbnx@ zmtXPW+qv6V8J0G2)zDVuzlr|L?B13$v?F|XxzLQld9%#OTe@=0!O|AvssB1@3}BJJ?Qr~i|dCNkM+>pS z+B)EE=n&caP28COzUQ;fu|Fz^9YsJd0RRM?7&-0u$C>-{@c&js{X*T$g7>aN3Ibi9 zKK-V73ymQtJk%1QYq-_gZMt2GfH1j_$Ys^nPd&+(wu?#WkW2Z1_r!_ylYDaLo|(2n zDF=~Ht2_2FZ#_vKmH!snOV`q|IZ<`M@9p{}>FBk>Jg7d-|VA6pU2tR71^;XW5Q$oF+j74e*3$z4(00SxIYFk{qX zkrSq?zu|H18UO@jlVE;%J%ol^X0taEGp%Z64OoutlYhHBX#bNdqZXv)Q5EXv91|pB7I{^9@w?#N5=f)tgJMNSW!D8 z92Fmw~CD+uO-HP@Tx#p)k@9d;gUByBnLSvXOk=>)5Mf0&4muz?Lo7;% znkQF%6hcL57Bn{ZmKP)y1c3(dli6L691bM8T_Q5|Vkf_j{J@{#JnJMhgpUZT1y~jE zX6VNVu@(-@vz5gT??n2t1agm(2gc47F{_4yybXe@I*Ss{VW8Buvq>Ld%o#$i+4g;D z5}g+64S_#@So|g)0|shD8ua>zTK|~Oht6FpWoep#kOh*f%he&umu!qmbq|?gUxGbk zXi86C>gMv!>&@$o6J+_Y#o)B@^ub~pT*9Dtz_knL4rC$6Gv#DuF_A}-b)9pe^_K2Q z1-LuYV+ad3c9uAsG3RY^70&IYg?3L3gY5*Q7qSsMF0%>FJKY;69tMBxIgCRb{~$Hw z4pZ=%WJTGw&ZF#jrYj;CJy{Y~g_VY5x)dLOQ)3%gjoe|Vht8q&Y%-2Go;iV5Ay+&S zh@aMAYp_*8V6S$L1z(1tB(nQ*b8ch4!+=!{=YM{*&8@Ttp3ddAQqgk85!KZy zk-hjIkIm7<>QUNxdwUlF%aHgDlLInN5f~@nvp}jRM>WD8q9n*lTo8}|L<&2mM@eHI zgUwj((8ZO>yiVoHB z$Nog|!NKAeU}f>IH8^<^by`V7$=`1i@_oo5g;>$MT90 z$0I9=KyMfd!mY*;gKp4hFaBvQFEEyhvq6XpYyvp0#I6?|@YY#!o-R&2S2^K}jA}nY zmIkRS`4_M6)N0nTRf@f5mx$Jvf`BW(c(_$=E26HsJe8e`wp-AUSnwVMQxuK}Nyq=K zDkc9==Vc*Q$bhdMj_j2@dV|M1YHJlXT2;&85_}wK?)az#25Ul8V~+$94Q848KB@4Y5d{;Kk>M&HZmM>0&?LpQbyd)%p<*cZ_aaalR03GC;=q1_*z!eQUhGj(N6S(M@9V8l0cc3R?*BD7 z_|Ty(S2jvnaWiEsFbytM->%Cib+5k)Cd@mXi>I-CJAWsul&yX7sXj?)X)T$Vi|LX> z+q2INGk&3#hH)WTs=i@dea&Hz%HXIr%LU09^lI=)5b4->`n>BQAzIba{wHE2HK}iA zO2i2$qh%6@{saPVycAyc)*U0fgP{Z7caKn0KB8Ln(n!^ z5XOZQE=-l@*~YCL9}vurnXHpfXhEq{c&#|U%S%k6zkW^48G>cm-F<4}3cH#vVtb{e z&^wM5B|gJSYNLriFY#!jqYCDrh&#A2s6FB_hsnW{rL3%Qqm8V;2BU4fPo3I&=Qw*H zS5zHprG---DcYkJ6oic>W;pZn5?5g(&2uZ&Mkq{xE~>|Va4fXU5RiRvI|3mpORWJ5 zt;m1uK}T3m-!DGx2WR0Z7NylWWN>(`QQnw5&=?eLE2X%8g5e*-KPB!VE1n+piZ$ER zd`0MIG*`f?y5tq2>OYFX`2&gRK+|NQ?y^OKrctGYpzVWHM|C z+;9c_9Nc{!Mmk4}ly3kE;&hNGLmC1=OU1=sW7c^twT}5rbO`eA@V{A~6v^sz?Gigf zN%`0uZjhht|GW@nBJxI;{lz$o@O{W=_3jWBJVq7jn035!#GE^RX6|Q(oks}>EqeNR zJJKZ7V@KN;kqZe}fvQM>1KPqyapOP0Wl@hMMb+u+ho}a$@5zmz+si_v%=HQfA{x<1 zVW#;O9KF*kpP2DaQX?9(XY&(}X9mo*b-m8>T`)a-Zro0g+4dbwCYjB;q;0H$CY|bQ zrvKn3A`m4-7g0w{EU=Uye6+nJKWCl1#~uh45j2Wz)5|$$L6tzzwiWQYYFV%%+!Z|o zLXNK4JgJcT*&doLpy${^mLb?q+*Ny+etqpx*8M=Grc}NeS!%d*{EW<|TNwzPKoAqc znx0LSTw!l$D=wbF1lMJ6oD>T7uJfESwm|A}b-a!x1voM!7CcvGuy7fRohj8Rb<_U!hBT)${~u z8?2gH!t<*=kKX&HYySIZbWtRyhXe@?4D50gZ(dFi0*GLrfevX35bpa_GG4z&8Ok~4o0PsZ*~;V*qZu{b?okKJ_z>bQ zj?O1!+jB<~SXoAFkx-J|@}3C9G?ZkC5xiiwJd`XK|m4b=gh7|LvjsrHGgPcoYHTPT2uDPs2&(mzaA-XzaCNMu?;Db~O zF$5Xvfzds?2i-354ZMjN!pCh1KR6By?6dD|f4=~<;p=P0$F<^c{vxo5Xr0Do*m1on z=^tYpUZl7STA2h20d6*0axhP*X&~5OjBQYc9T!2uDH=g3axF81zrzzU8tPfQr^T=apRF8v81vFB)&|f2(Q0-Lz1Woowtl zE5^F52m`LJrG+jcc{a|5$VC}9DF4$^Qu8RLcCQMgQmBCfX5$0)%FAQoyVM&nABEl_ zfRs;G>*{D=fJg-XD&_hZ#i4V!%uT^%!;g8MFE912h0vIm%%+8&2x(x+fVbY+!|;%V z!#8QCZQ^3J;Nnkwug2I$dw#tS*?G^%3{3l(4o~ovbZ@|?iQFfR`v{mSk-8~KfA8++ z8?szLx)f=ORZ|)Q&ELYvj@|Jfh^1IqLS+2+0tdOIM(B({?Jvoh>#hs}1{Dl(siSw~ zswv=y=o7gta@)vncW!*Nw$3@uWaTzSz3tGo!J;uaohW0yJfKaX#B4N~a4T)uo@DOHaCO+BvGRG$VTf#pGfFl@Y$t zegH&QoGQhSjXnn&VmSTITVAeskCMlBUvNu8E-4wwqx#O5mV&bpzl(1Uib6Hr&-T+0 zCQ>`vRxKp~2jE4DQj>?^BWBLO#$s0EiOg6h1o2f2A(FdgT%V&E|5KU0EX zJS_*AP8iQOFnYshesI3}ASptPZ`f&U&uhcD7WX>?vk?*OT{HL^s8lrjWs@-RxNAhM z&^_dzD60Ne3xm)H6L=u6r0qs~#4opQ*mPc{VUzD3|IGTS>P7|Ovx@0wE5)_xd8PUJ zQ`$?D+R#&hW#UrwVFRRTo3611UW{53*uxN}8SOCV;XJO8Yy22A9|RWUNtQf&#`stk zrKPDLwb2Y~0r1$KK2sKfV7xNRrNE~SoEDb0H18(}eIbvd;v zrgc$4^y40m>qkX#F9hS{szI^bl2g}=r2s#N0d9?yKP+u^WpqZ@g{Lrs!?MrI%Nr|- zrXMsF>amo@;fxns9tbJ_ZL6&LnX#LoMNhSwkByGzNoGs1!HX)%`I;zfWwda%Q(6Ow zbc1)s^C#R)5e`t>wKy$js|8CspRVTZl67F;9!{rC5+5(n*HOrm!QYNp!4qtI6?DE*90m1BSoL1)+_pF<^AlMGB-!3YZkFMl{GU@PuJL_@mIm9f$K zXe^Y3*ADl>E4J^>dzk}*HcnA1&#jb-N-0@K&*P(cajNRJx3|OQ2{IMhtU#@mbbz#Q z?eO`t(fon`>zPM4jqF5kA7H2Wc@eZx27hKUz%jZ2%hPltoY zSzL^0AOvC}1c8eyNBqC*(O#!+S5t%5DO|T*Tz;YC@JgOR63X&rpI0=@rgNC}5wjwn zAj2|DiwHY=MM3L>zs@pn<9|P6)_YUryW$*7Zk4#cj1)Lvhd=;}+djYZp$_F+Sib=0 zCXZeiVxMR%lYC>|aceFMGro35FB;#--ecP$~J z0pbxLuF+Y3YDhegS}XoKsj5Etris(fZsi~&!g}AG-9%N#9maIL@xu8Orw~iNB{o|J z`^1c#p*Wjy1wf~qI+_3bu$)dI!^IF=fmFv|haM&R(DuvZ5+yF4KKqfm7rpY}BGsMJ zYKHHD)UWlgVzeidl@(Xw#SOAr&j1HGyj49GogHw1ZCP?Q7_;}CmQ@^MYafno~e+YdKuq#Z9V^`2BJ3!HbHJCC4U}b zdIYTI@&LjpuKagT#L{%vVjKJ+J8l|3PcsQm>%QASxr+Sh4u+BYRD#zHJvr zI?}+Y)bGV)_1LT|Tvwnz#K~xxpq9N0ODn@JJPngLCd>!mcsx7MgQ>^m2zJRlNqTsi zVVt|SHv%Oz#e<(qP@SIh)DHxuJwRvs>C;MQDn?t`q&|Hib)R$RvUMc7tworWIswwh z02xgb@Tm26pvR4wVx85l-mr+K4(1l^Y5RlpcI20pK{mJCUoG{RF$aOr@8F%|#4g%5B2R=J zTknps0nZ9++*h$Y;f2*&(gwVqy%SFkd~nAJCynY`>h%Q^{2~l`p|n+xMJeqPLd@bM zm7`AKPaE-h${xbN^l;BDj(r#U^idPwl3r^Ca#VOA(T6bsCj~KWFTZXNM;`*9e;(_7 zo#(WmYAtXG>JVWS;fZ*d6+UW3CQ~w%e(Rfub{Rre(d&@W>Y5Q& z#U!w4B387r?aH|jYS$xoQ*;s2B=EB>$?cEUSyx^X2J`rM=D%$PsfM3t{0QdrqylHo z^-9xv1Ib6UL&r%<-3QLwc20FtIEZUvF!`yVqmqEY&QH!kQc zXGg}yP-J45oNd0K!4({}$g3o1ZQAP4CdR6)--0mK?wbekA)_FIK~Oo4GTgwB7}Gq+ z7m5?LX=2=~)KIcGXD2H|!W#b5Mco4|^7}rV%@@ppT8*R@md5%x+&PTPI_9b5IiSLZ zrr_yEm+*KdWKfySm|(#{>*|bG$JH4(%ha0w;{1!>p2^p(VOxMi7Tm_dE`Jn&2$w){ za#Dh%IBL!`@%!|%jhaTz{NhAb4Pbl1!fUkU@gX^@3Pzy3!pfhXCJF_Q5-nXl^kijJ z+X~!?BonZEUmO&OLd}T)lNVkQ_ELaX-amox*3U@E3h?#ixOt_(Vd?#=hmZh>`*wlk z3AsO@Lj3%QgQw@=Oq-c&IomQ(>LgE<`qIp0tAhMXb9<|2W+ynJbCkvOYhEP|ERSHwt|m4U%V#}UhnHISm!Ou-;R$R{*+ z4^E4qpr<*sy|T{J+d-(%ZfeZFxQk3v_P59^eD!Lz5*JbcvAx<#@vO3qWo^{N zv5mxs@-&nbT2VGeI`Lsb^;Wy14B%eC3sY%<7Rc?JYpS0};yqg)-Y@4bXfC6?e+Ecp z&<>s6KKVK5^i_o>Tr+i87+ub}n*wZ%&K-P`nRos79XJpY$RFA3+qLBOVy!W24SJ=L z8FpVRDBa$Q=!i^sBf8F5NHBOg-VE<}s3oWesMrvOhXyzB-QEI|^SY#d?SMbci0v{Y znDC$|T`FdOwAbWzM*vbv&CEteMqtrhMtUN#1iE!M6QrAXGJg{3wndUN=s>vv3+L#0 zsKSZx>vTg&V)Ezc2GdEf?hDA}>wq@<*|JT!%2ST;J{~Aeb`ML3ct)JVYvLj># z%XB3pQJUX!pB)>62MKyzSd9dzuLAQBW@|ASEF6JlWrvim^^R&q3z{?`mWT=4<4XK0 zaML6tSf!iA_j|QH~|ZEu4JYkgC0A@9x=AQU`$y z#J@zOY7qcEe%r9Y=Sy+=m5U}gVv*bBts|h|2x^9@#dZo$pwY$~<`&m7T?h!6f!5OS zw{dY+SZo+EPKRkGQ})&?bSc$y*p`qy{^%9AK9!$KT zE^q%0d4l1$`kbL`7ynmh#L?c}_v1?L5Xye^!iukcixymY`@{yrIW+E;b zw7@I;S}>0`GK=V0XFC03WhJkX+1NB)$kzB?)Af-H@42Tg?85fJ(U_q? zsq$giCIT5*uC|&LAecW3NPqq}Fz_{!fnDJRg3y!jjj&Me-X3?h!W-6=c;v8C*s^u2 zI8P7Z_kvLcPC)-uUn$5PK9`D|T9VPISf8&tEG|on$Zzm-3WQTlTbKTl^iY6MR<~n-^GB*x}_bI%z=a*%Wjh?FwAqx+c?md0#o)dLdn~ z2_;;WWc16AMOZk8Z3eOv)-1IUV3L+tLaXhqUHhtomn-+Zp&90Xmv=-APUD03J&-5E zF+TkHWiY$)!}O50)`20?o301uN;$c?pojJ~=-I%J($UIHEWy#f^g7O9c;6dMY&$1- z{^gm6Y8U(Tj!USV(5FNw#;9dOz*-%5#WH(y${x8$b}KDsYJGQIT%RV~x|5_$n&=Hj z+Z1u221?7EU2k{6(BOwPg{MYMs;IEZa?QskwGaq!7nJa^raPvj}q@tAkQ? zNcci*&5hkK3J3oi*v-(XO>(4v)y~TsXDC)0*?8Md26qu!2Wo#2cjl+?>g($UmEW-q z%jtMiY>wD;eG16f56t5oSK9mI;#5heHFtyr z4|hwGJ0J=&S$g;)S8f}VzS{KUpH+I?a^tyA%@uj96uYmb`HqCU%iP%Qbo@A9Zkqj0 z+3>*iifw_zkA`8r0ZtKeZM81;m40V_Q+gDzAny6E-OU@)IBMZNxkwd>T2qHng)B|e z?`fD+H;A=v8n8n#X^K2dW|H{i%S{2!&*iTc+p#`q*mZ?S)XK>4Xq6%rhv{Nj_2A;g)Zee>QX$VUk>elYQSwVi!oj;u}60-4zUJi(YG~!`^U2W@pwZ0%LZ^(2^A!RR=2-LF^Ds& zd{(kHce$JNyF}&_oT8<{ZE93mtB>MQy1p_GTg!tR;mNEk5E# z#`4Rh>gL@v?{%;8SSAr2u_AP^GjEVCCbXpFj~Fjoi3Yjio}+qfnpFZ^`e=!GKh&y0 zt*y54Zx<%M#<$v#CRg11@~0xS8*ezDPkyr!T(tK?J}o{dKcLijzg>oWGiOa5KhuHX zrJ5d+LV$34Ar+MgKS4N5bq#!o=^e9bsO$06NWM_l2M$n& zdtfSGjqqT}p+^fWQ(RBWl)w~Mf3S7JKq;+8twqz9DJy#d0uHd>c)}f91LwAvz4F5Nuz{`Zs?q^l5B?jO@@^Ja0Bl<2-bP~_VGTVZ{i(a{D zT#^3+&`AMk6u<}^Y9dq`)r5@W?UIstdO;F9U)Aj&vh3O24tLk2Pg)|o=Rhuu$tbT^ zcYkyk(k+fqqnCS=fvJs7?U!g-^2I)!K*sU8L$C7jIuga#yUAe4RPByLyyv!RPh2M@ zaz&nh$$IS7Zu?q7g|B{xQ*$ZIWIYWl>rReI5eB^2U%^pIa6Fg6s;tzJNuBv=?$9-f zQnt7)r}+d&9s(zJWh%Uqpj*%q@eL)u7RQu3Ej1PXMu@*YXNUr93U1yAWsTomZj3#S z`M5m^8B{Y_tf!Ac>ljGIMpRrWvLq3U3aklKI(h@pejTs}xFtLmD=J4;M{+2gNL0YD zPKiacrbtT@i9C!Qe%wmI7zZ(I&`RB5DxPb}GQ`tboa3=Ek~1_Lr+cVLB0mvfJXxij zAs17YVzhx8`+8kw$F5sB5MRR28TP2MWRs@9dmfF<0!mL9C_X6IRs?|}Dn+!PfRm4H zMC1e_*Y2v5@T#_z^Ofdi4{6(6hN!}%3B<221mw$d{P6cuS>;wpmaoUvN2y=Dq z6if#vS&OR_kwoKYMP;^N`z;#&;yu%Rz}`51JfX`~^7*e)IuN#4!>6qhOz!T>%2HU) z|Bz%r>Ce3x2ImD1`u_#D0Az8cYXC!#wTFy~5x*F2qKVO`=*7a2@rME<-6dezGvGrM zC^x`VYPWMqdJn}dJGl$U3-tntG5smo0|N?<=fF8bv|0cZx@}!`+1ZB)l?^I$)M;i{ zk%Ni3^w$(+)Nc%^Ca%0hSqd@m>9XLE5IaRNR6!Yv7yCe20Y8+M2;*#h3gLA#ia778 z+f*Xr2`=35)isXiu$&d!V7UZmQQWmqiDN=gJcEo0K(1$M>N-I;B@63e%%tG>8`*1} z$llUi^oFR5e%0V+<~nJMU$gUM?M5$dIG8$!y-z0%D--t_R+h0w*B?&Cvw&jME|V}a z%WrifxYAi5SZc#x=ZKZ%#v90_Q8n`U)C*ZdcC)sQ&H@6{5RA4h{SAj#dJawP)6Dy6 zE4mOSXI0gOXtttZ!pwQ&QW|#EMcsVmygV2chj@FVFA%D6kbJHt6nzwh=#Fn7f}Jxw z;^PD4cW1nJ)nCtHJ9l8&e28vZ18PvzW+$E~VLu!5q9vZ$95`4Yg@%}l|l*u?xvO$GY_;$AnB)SF{{%~S!jJbJhUUe z5JPhU0bx|wX+mQM4;#6Q=*GcN2Re_|6&)3lD5V%y*47Bkg937Ht{gr`tAsgj`d`07 zXoey}o<_;@1D$NI?mSQJIItgBShRomKw^}1ULMG;7f=)&&mO0b6#XH{VIF$@Ipv2* zZPy5l{$=S}CF+7Ue3&B2pQXZ;0iJ>y1mjO63h(X00t^tbUphJtfXsmF zxIMnHYM`u2xgOzc+9^6n15uNd;zSMXygLRL#oaJzkZ+f?U9<(bf;+YN(_;~ha>1zI z!>q5!T3P%NrTgvscf7cPD|0G!woBNrq)P76+>45;1c;sC+uD$bPEWCap0b8?DNMxw{LL z?bl9_LRckaD2_{#z3zd)9KdTF4>Cn!J>8qM&Td%DS%NnN+cqlq52dh@dymOdMjFe_?cl$ z7Yhqa5{}6(X6UsJPRBvb=#6g>KGRS$#R|t%e%l+lo$+_@J%KTtlH!}KbhIiuSrZnE zXi2LC)FTLMg5eCzv2R+satOE67?WwgW8ui!mRw~+gNz-@o?vvjsVOO2p|K1NT^t*W zM~=b71VBkIS64ypogMXYXBA`6m~e&~dO}f|E}-*x-3u>Rb9JlU;yJK7L)I;nu%I4~ zaBQ{CL5vP~YmD&V|R|km9o}Y^`|)R^-Ltbd#mUrJZhg z#v8c<#OQ754wBnJ4LWov@Q<)lfni5Ubr>MH=`0EuP;t~zo*hrW#AiczRYRxZtPaZ5 zX9%QU53)Ii)UeAo)r6BEH5Psc?O(hh`F1dEgoF$ER~IP&$=Kyi1trJ{<>*~bNN!iS z;*ZQnBAq-jafE@QKL*1KsXF8Ms||zc(ZYaF+EtiUOSXvDsAXj>=RuGP4>oT*^^ zJro%cF^)H7lB*mJ|33YWnBA&s)9OJr26lQ)jc zdvtO&tqpH9OqCkeF&BrXrX*K!BOXSd0$au((2y-)?*OE$W0Y+_`{6~t(0m?jKrV=P z5O=_pYt78ohBIrVX_s(R|HBBJjeFxOMrirp{|%s37xZ(0%OqX;fW-uI#h90N znjv38sOAl#s>QD6Y@3m>d8*>H-{Cxt3C8(PTC~eMcEp=2U3+@^jM5|JvKJC>!BdPS zV6$R*V-)%1-GtvHZ^7l=3fN^*bwllPEDslc#Z;-dxOmsyxAQ;6g=+K+ggzWP{Eapv zz*BMvittu9NkC5xCw{pZ`$axN-z~>MidYv4yw5-0OR1XR1pQ@aL?o z&ib9Zh;A@2yQlVfGYiphINd;I0obW@ZO=WJ!r$EoMc4@na@#ngHV}KEnC|T82!0a! zZhpyI=BNwgeF24=g_gwAIg`d#H|qA1YMMqB=2xYmLWfmh6UQvXjq&7{ zE6Vk>9ahT#4*DOsWh<*s!Hftto`>N=;*x}LU<8w3#euKUhnaD^6W)CUOEoq&dTPm& ze@knCN(Ir!z!3s8ExaS4r4I&l<;ukpQ z6JE+EKsfgCd19)l?3DP=>cxFFEk06&`XIaFz?HGb>`q&43wx88JBqIuQVEW>EoTol z%?d{m%AHB6`h2jyOKd8kD7jRJ#ocUM+_qI)o4H*EWtC*Xc1nLyk3Zh`E!x^(x5U+< zf`zR%`ZYA)GcgT9>S-KP{!u?SNsJHG5};1Ukg@me7jKNyx&G83KE<+hJv^-W-N4>r zJDQ0}9REP)+~Kuv)Q88%$BBV6%z9h5!?77=THXO(UVpRa&@Q0d+ufl50N0T4%~maG zcy2BTDHb4ZW?&nJ|2TFZXc#gXomZc5)VD|j8kt0vTyY!jEvHtICHtk~ic#H3)#cb$ z=PQGUL)v)zR@bUgwBN|M_UVu%!((adf)c|(Kh8Ivt40mSO?7p(sP3F6g8}`M!WA=% zYlHsIM{IK}5fhDtJ5*X?BNC{EqJK3sHI2?-x1^?~VxLTKgn$HgWB^e%d|(-B`piTK zt!!XJLsVje$dN5@6=ljIeI`qZ{5NN8fWixeA`Cj%^y@VjvVCW(Y`(@!H)vf%Q^yg7 zP15tgso!~b_M;Zpq!`ci%AhVylT(Dks~NSpZt#pbp$`E`{V_C~k8Nl$bbwu}PT)@= z?)U5=0d$(lj_6J?XkW7i!_gOKKDM@6+1grIh_656G54AO-d?2P{rKsov4X&QukKuH zl>o=|RGr|DLlfBNMUr3`ZXLU&j=s$Yq{a^4E~|tI@U}o50C5E6N`5qpm|4Jj0k;Nb zMA$ns6!FM$;8W5w6|*d}^>zQ!r=ZIg+Rd9hT5#=<=}RDwL#65_Z2F~uOR_KiDwl$P zgO9B~1w9Z(nPLJEV6acPQxAqlN1+)Z% zKI57?eMX6}2jXtvUz=p0aOLqy(5Rl0uFQ_B*1EKl$2Lhu!boTwb-4$@^8sTT%_BTP zz(tE9&N@VT0EXj`YfYLEBjS?8VLL*a@ws=nO!rv(i6qOizl%=lQyx*IzJHim8MeAnV^Xy{nKF~+0v(1a2GyvTHbC{0SFKK~ygaR{B$+G{KKMMcxL2A&QG$a1lUcB-`%eWv~c za|8=N+K-|nuX09U;~yAzVYmQf33LYnB>2l?h7B0DxD9Xmk21HmPyU_x$~OM^POBxs zVTKG;t$n|Rg>k%t8+UGl%Cbbbr}+7?ii;0CFh{xj#SaZ6km8t2cd!O_o4iN2I zbiE%vK^{p+cwBJG!<6nPNQ|-sQ7>F^g>k&(6TP@VK^r;ANN^5ON)d%%HU>DRnC*0S zhRjWke*FqT)+0d*4@605es(M1u$%~(M{FQMUg4}EC53s+G9J{C=_P#X*8#L((6s%w zbS9FL8lSW)0R&p}hd18)lj{gnAR=c|6LGA6FS>_ej*x z(3qgnfiZOM5RbkTks~~edv8YJY4L&M*T$}Slcorr0c_H`bUHL2fWja3HJrMQ6FlUP zqLZ*>K|LqBbP0yQ;sU>m7hgb8U<+*IlHqT7mCzP~zHnprr?rgm!0%W(sQH<)c*L+9 z!q7uLCS$0S-cTh^!_Gnc3N0-d&0tDEgR-c*w`;j`lBL+$J}r2lubs#ZD^$Gy>iVUv zoCDu5eM`!}s8~8X?J(7t2#Cpeh(T%4lxJ}8)+;X|I(zE@YNkaqB>OEs7%YLU{acKu zmNFS}Xh?d$?Xqg;&{J!A+22Gjk!~ZJC%%lwUz7ln;Rp(Sq zXzl7!O{|{}N$X?59S|hY%HHN1fYh)yDDRQx0=5i|HXS5QfVYSz=^ew1llY8RaMD9O zE|a_ffDH*pTwAXmBQy2l`#XaDfjqwm?Vhb1^v6)gh87CiurBu6fq}K-o_<(`;5l0< zigl{zl27}$_y8P+W{XfM^^cE>DGY58f;*UOU#q%_^ZUoJaMHr=SacGmp7HXdNBFVo zULg;DZVf?_b=F8e>tw1`*Zn?bSB@n+SU0hLtN$2}^|lHl8?Ta^L#3~sR%CqUFa#+Q z36AV(dUPzIDZj;P56O*MaYhyqP;f`Tw;v3?NT4KY8wH8x*ZO{|5afr+zN9 zR=J)ORR;LH3jX4tKOm_h5z7}pT|a=<1r}9e=Q5tK*6OVlGDK&I(_5L^*4ep093&X& zRyan>c~b$j!>kAfWs!#=HpG+-b&kNF=y!jSAjrLM-HY{Y)Yg5~Xxr0Pg#Dqat3~}n z$!`!AbtFpQtIgf(kSXee?)rdER#xuAhZuMv9SS~S#f9!5P!McHYfd}xS!)l4CuL8R zys=3W;$*zFg_#c0LlL?CrXCQ;*XL)tFg3a*Pym-*7tqQ94uE$8bvW!~YI|{FV=00n zwk`J1S{mgfiE&tRslsg-Q)5DbF*|!M*$ZD0m+iyAuwV5YKy+D}sUmcUhlSSHF(^uZ zxvw*|=lWe$F{YF+hu{pq2rZ?9m%)&-uz;8UuDe+1a1A2r0fj3gZj~eB%e3f%89wYW0`F$k!Y&;jL{C3 zS+LB|)CdofxM?>PW8zi<7#JBRuR;MfCZqQzEb~i{4%$p3GYq^^_N=?|(pT!pPKqJ6 z?Ufb?lW25)dUx5KJ=p7x(z>ay)J;C<+!~d7zMjJ4Uw0~Ns5Fix%3Fq(_)QgLK~QZq zw-uWL9pqz#r|!AD9%zeKG~!oAgeMdbrTy&f(Z>e?vBV12G31Sztf#^fSKVuDPMaqj z65&bGx|n=VMeiFovCR2o5B(v1Nsvj};5s3Iu|{NL*+D^^SFW3l%#higugsLi2e_;7 z=>OyKyWN&yGKiAHh#PU!P^Hmk=5>hd0k`288(NfYx_b%h2OTLF4wWd-bYz4B{M`ymeRzg zD#|MLKCgbpyIror*v8bJLd@&f5RclAnnmEuS1WTxvAd4C9mcF|EFR{X0ds+ zy8Y_}_9)&X9(@NZ>VZ$vu;c;P?d-O=*x2vf#K8bI^ow;|+6k*70lWj@hzhByLI}k< z!&`xWc!}=r;~4m$n7gqX1z|@8@!gq0H9~G){ul4otgnkz-?QHCIN40QZX&ZpNJuE|p!{gbzM!+-2PS)8@4R`cI2x_O)v+uh)fg#;z07H-tkLTB?*5)(KuL3_CvR{2&YT zbd{F#n%v)YrI2t)*t8(b@e9X03+HR-%$IwMedpIK{1G}(Dz2%sd14{@VwB)p?$pcb zCPQ)$C({23sq!$83hDq|2*Dry{rv>94PN^3@*s%~BfS@>)8f!SP7C_z$2wvrE`K+f z{}{m2HecgdV)K4QnTpn1ZtG6*njTMV96D)rlE`(%=?dl3#veB=EPP-C^yzgnc6$sn z(1rzs)DowY*x7VHl|UYtb4E~LmgCfvQF(pp#}BpVTFI|LS=(KsXRTMi);NA}TEaeP zgY+d(8u9D^q@gRh7CzW6j%#f|e(7kPOjo&oK}E$p_-usR=g0^~5C}b3os5rvS4Kt1 zp63BMg3W=C_!?DDOu_%I;dS9sNkoj+QslsdOpiHQSXd?aw{fMW&%B<>SJp)_fXpWF zrM$fMA!A7_&#ppSTN{G$3AfFz&H1JG4>1ZtTjh-r!LhZtEQII`svR7v{m(C+s0-+R zS+%!4o6lkTC#m5z4EW2+0w=$B5N8z}x6#jmwE`3ogY{4-?s3%Y@hINQfj;hgL<5CH zFSWXz^~1C0!G|Yd>n_Yr@CC=uZ5g|=xt_b>^^wQ-|B{;f+2#R1J(MoQq3_!jJUKm> zhOvNeneDE3IumOhpdFvMiIK48b*eONhoIkgpTmE2w`Pr;nE}ZMM}+rqc3$O21Z>A- zv<=S$IlEc|YRFd}lA+tgQAtmo-uB{T*4erhRLP0C7!-ggg=`z$HC7ORyEt&jDX(RH zOz)2$#1jWSxoc#$v-NwGA9B*z^yeRjCX~rh63h_(ml4%b8!0l)UGjTtttj;(vc6Z=5=a z#s;mX8Om_&9NIDuy`O2#=Y~S#FCROpjpQAVE&VDPPFwam62W8qE{7GST z#n#y#>BHXI)ClS$S1!_z4@b@;#*&YPCbH>Wi6HUaP2L^>q+xF#ML|28I{21znKgwh z7VEx!H1Z(c^AOivQ%EFN$w4=fl_gcIXH6o#k%!yG*~|W%t3kv%UR|)5@EWF{p%Q<# zD$~eJ44BFuK@WpQkx%#+bgok>&N5}VSC>?Xr*I&_k(^%jDMj(uW%MqLwPq;hD;F@r zIX6Fxz>qQ7+3?&h4{WN3{KQS?i^cE{Q@WK$gGPJL7UU7XDQQQ%yh+o{+WYDzD#V%z zQ+P}8b1@N;i$GqLkN&Qt)}>OUr@zN;KqHOhoNZIbUJIKQrr%KEB9VTKmQ#3fXTH`S z2hM9io=&pm+!tYNCP76UCX6c+7j(J*- z@-Jal88slhbjg3qqVbD&OvdMl0Ag7<=klt!IGw_Clr#O%83Px=F=~v;Y4qCr7&KBx5r^pDxq*--?(mq0WbdZoF}D#eb_88_!5rm8hJU zQO~#aoLdn81eA}X-dn{uOOD)#xm*_TM4{4D-FCO!6opws@07EJ;7e8Qk67J1c`% zAB77F=q)ekoXesfEw8B9x@Al3hRUGn{4AwkxA7bVm1zw_*%=R)4tI}Pkw^|p{yC24 fpwVrQ|1U1SRr-8&V6y>*g#V0<_88`Fb71}-Hu26( literal 0 HcmV?d00001 From 288342ff740c06a8931779a995aa7c15ba7cb18e Mon Sep 17 00:00:00 2001 From: botforge Date: Sun, 22 Sep 2019 20:12:41 -0700 Subject: [PATCH 38/48] goodperceptron added --- .../img/goodperceptron.png | Bin 0 -> 17135 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 Project2-Character-Recognition/img/goodperceptron.png diff --git a/Project2-Character-Recognition/img/goodperceptron.png b/Project2-Character-Recognition/img/goodperceptron.png new file mode 100644 index 0000000000000000000000000000000000000000..1f7cec6553cf01096100fa238d3d6510cbc106d4 GIT binary patch literal 17135 zcmc(Hc{J2*-1oGLLMUZT$r7RLdlA`%EZLKN-^Z?!G=wB1TS!7k_H_u^Lb4l6)`>AD z`)=Ov)bl*&dCq&@_pkSyw{!0M&dtnRziat^KidU$SL4oUat3k)0&!Yd>9#fkap(a4 zzD#x;{(V)W|2F*RsE3^LJu>(cKxX+IzCP)ybl(Gkpr9rFa|rP^`8)z~5utqh);*tO z{MazkxIb}g-=#BGSy$UGRprYyjGPGHk#FDBlxbff@N)_8Pe#NwsGGg)n4#J-sY4P@ zSE?1pnQF<6D0A}c|2+Deo%xBfz;W8km@oNl1H29eI;+O(rq4ZI^0vi0Dho@emDX^* ze`R02*XO%2ygTfVbFVa}WP3_2_xf019pgp#%k#0zC^Is)r(Bm`ZH2;@H!l5u`qJV% z^2k%_ELAN!#XENn*}Ay2;JvZsx8%qwr|IPulY4r!{QUfW&(HTrAmpC!y=vlrI&?ie z;|B(#>f*v5o-tD`E=R_`OaTvPr7*Wu&K09Xed@RtU1(O;6^o+t2)(?xg$4UjXVJL0 zI0N2uh{KN&4<9~!-)m>@;Be?oN=mc2z>k4}XBfO<;b3O({K^KBCX4f2L{UY>^9-)1 za%?oHqlj}6$Fh4-82q>CW%E+Q$NW=5aVnF`K8QDIX`D1Dgn2n8CoiuN@BOfNSRhdi zG2<@&WtcZSLqJAG<03Vp-txwSa?Ay0<|{NP_)+BUHR>x|bmVThfooqU1td}^!y0cr ze0Z5M?8qZmv6_WljqKQ+9X}?@6R>0oTIU%Vdll2x{G^YE94g1CJ2>#bgCO2(b6<)f zTlPu5cU3`%;R+pjOvmhJ4E~;;Uct^D9i#G|5#6cdmEL%U+S*#<3fIRuIoyc1NlD_~ zi|1w1qEE2++*DnGX{cHL#t6??w>Qfl+iAVsn|APY%o!V}Z`y!eeISx_{rsuzJqHv` zaY;!NXIg+9O!#1-nazU-haSCs%TQHS^=CVPNj4~XZq6FWin zF`i0h>rqkBb%cb3#MVjz`g_dfJAeFYD~$ONFgvK*PZ}C-btQ{8#_{MQTq})zYE9`B z9ymKk!5^s_ij2O|DzAw0aleT4bhcB+kL+#|u-n+Q?BZgb-2;pZy7SK;0ghNySC>ZJ zMrk3;Z`-Wkse7PGcqxM8?L(5VtR#QB+if1;0W~CLk`} zwvHQA&hGW}^mP6GgDr|xRzp=)b$mQxmkkvh+b6jXpDJ^(^{~JUU1#5=ozkJkPBSug z*UIWjc!ulz&nxh>J@cM#I<0qhckP^=Ias2xxWdaS_ju1W5;i^77j$KIeqTO*{CJ5L z_tqti;$dE9X6C;B{s#i*OA4c3ym+a^?kbVO%g09(-BOySPslaqqvuPAFy;%JAPmaT z9yxO4dsmlz$`g2!bgpm_@{3L0u%l6^ld$N~GCN#&JRZTz%j>#4rQkeXljC)~4-4=5 zr;Rhsb#+cXK0e-6j0qkX25^@yw5m!%JBJQl!cKGY96kL77M8iW&ZJXRRDwc6u-w_X zx#5l?N!jS_jh9Vm<%N`k7S`bJX~aDnH4fTrjg+bS$43ySPM!KQVB~ULTKe?O8;S3$ ztCNiR?ntCq`}w_zIh0*Y7$x%_1i{6s3XYO5X&NnI-^WWeNqhy{Mwn@t> zD~pHC3s2bw8-*;akx9zuWm%cXjl@%zFJIn+ePwBDOBUVI*hnQy+F=I9#xtVm?bEQi zM?4=>Z{`&h#pq;fXzA)IXlhak3JM+#xs~t1aO!wsQc|O>&`K76Ac`~9=Lwv+XJKL6 z+ugxkt*vBEPEJzmeL9O{)d?`^LCXfq~Mng@=dl^O@GP z!Yk%I843qGE=Nm!g@5F9b#+rGBK>L$IW>NajO3nGIa!nf=Ov7a^+RcCTw&09o)}Y< zu3Iko@et!$zqpYR!~AzA-@&&M%z_fLQ~koF{WpHOSAP8X(cInLUH7pm7u^)>zqQRx zTFS7T+}x?wn9EG!?zeXnd$TpC*Zt<_=Z7QFRa}?*`}>(By9z6_ewt>gAjMw@XVelp8<%NvdP}^%w3nRbfrbT3@ z1-*qb`9krb7QH2D0t|S+_4U!gmWR>(7#pk2ZNYk1i@VQ|g=hRZ*d+)`OV`>v_v5{% zSGUF7rjBZ4(-sdWN28x{xxu2qJHdG=cOLtxn0s%*Z^u(YU5tr?i859ryOfaU=C}4+ z>ysXjVrEQuIQgjaX6_r^>^i7^P;(YB<}om}xl7&l+*0kK4Bn4! zxm@I=-Kk$_dhA!s9hS*ZdQ!r_};yHpSHzg+n)B$ zH%+>l3$A{zznG zBz!D2+0hF3S#qcXBa!;*>YXORXulNEL^V$&vUgzM!`b}!!#P(m91A~QHkcbA$&|8t zA3(8tqq)c>%oe=2+yW;yba~k=qv!WqY|gUnjYPfXOpWYDa{+o45#9(K$N(#T)Z%6 z3mYjYD4_pLE&Bjsfu0O)8-~8U?VC^SD)?pi(}pkC#+3jb<*A7bbAeW9mQbC{N@LpE zbAvN`KCV;iA5SzCH~HTGF5`2Kn-4Ud)PNP`zxBmu>f{Ip(`H?4dE)}?l?L;dusbz6MduAD!0tPK=NLLk8(&h}A*@#_nlXrgprU?4m!C5zPa-uY2&g7$O#l&cujutL4sPfMQ@ z)Hq<=4!QB=x#{VT3xoG@@W}_8!NdfI!GbfVj+YWN)ztpr%!qG`ZMtuWi*uvQ9P>g$ zkIC%)k%tN7ulaKr_V-8-Ej^#Hxwm)xhYuW3QGIvjvx^LP_vST&4QaDiS62i6G@MA5 z_K%0zt}!L#4s!QPoK@aSdaoCg>;EToCpJ=9UDy+4hHx;a(4xhl_O zn>Dl(xGqF0PA!XxI*(oiOj&BE%n@sh4l+q9Ff=x{1h_oL%q|+PvrgGM=}Lk(bPALF z!P^wEv9SR#mJz}Nfq;$GlzCuo{^%}ONoKH(mQ3GN-zv77RNw(BpP*uC!meHy77o?O zo+iB#PK>9Qm)qW!8v`Tb{Nm!w{^o>@we_B$N<9bH_qv$MmXX{{8e?W(#9ES-wQ5-DEj?_U!pn$DOvuEsB0E`;^$_#OjjTUfI~uCA^QwcBI1{cO-a&dj0LVBs!$5I38ZU^+F^77O1huBcGD zk;o1#1E5>X7wvy|G01?jF3usdpP;X=4;8y4B}MPRm@gqSH&^9V-H;Z~v=AM6{?!V> z^QWLqI8m(vyj)mV_-w#yz#A|5EoaAHF!2ZyYn z>1CVZ`%bcMnLR$;ElfUjY@9*!wQR9eWk-iEHIlaw)3hMDl(3E$ziR73nS&`R* zvK92~4U9|XAP%ZzHgKK^by_*P=QW%MwnDkgls|iV*iZno-u2GI)W8}8c!ecUVwV$Q zXa;g9#4!7ki>=*woR;|WCk@mNME$IL+q{XKV+g>~)WD7V89iLbLmorDfMy6?0p3FS zK<;%Be1MNs*Z1!)HVnwzh;VR$+bOX>X|`bdTlfjg{c+iK zK#Q?$@1@VljmqZrs{sD=UV>fOLfE8@Cb1f^6b(4#ZemQn&N@Ih6wNjjj^ARBRfduMWzlT?TNzx(4jRq@Jg+#=RS5Ryg>u$e^SpG&d&) z1v~?2_{Y4w%Pi5cO6)&y>qGdBC(hMFHTZCcQtEs0kK^O7aRvdWXJUdX{q`=`)0XM< z(o!$Z5ss!PE$j@xNmWy&2NqUXYID?Mb8oj#(t6!3T{~~zqml{uAwZ11J*h;s;|U$1 zjgzD!c&t35V)34F2hES4UuK(RWn{V%1#R8{R?Ewa$h;H%s?!>o;Uu^C2Ab5tMmgGT zWAXln4kK5XmVWr?RpoET$j4W+aYRwFKw|Ma-9Eq1i zyxLN2F`*5XV!5M5hwt8A#co3rR1rA;zOF7+jU)5e9hoMJ>(H_u-AgIB%n05<^ST7BMgbq7N;=HFsj)X~=-wDK@CR$BUdE&9>X77k3U5RZIkdpe& z=u*=FkCK>{MtZ7G&v$iSAa4WgC@A0qU4~G%%Xcm!G~@);x$n70<0P)6pQFP#iYT)$ z1{hBeQtFI@$$R6&cKRc4!PdB*sP|L`%89qc0x6nD>d0Ji*>7I zZrpd@=T<|i??Jf%cJ$-N4+N}ou}BFW*-JjPiGbBkr|q$Ym%nQ%Bgcatk9Fo8DIH;rk8Ytn9-^kM9;3zqf=ypvA82T+ zdiXSrY@ik+)MXvBS9g8%@Rr7TXrk#utpZtOLUi}Obm6ond}i@*jjn%gOLY5=wp-!K z{nu47aY%!Ou~BDIk20z4v1O-0kt7q?yWttXfcyRaZ4hhh9Wm?(Ep{5fF+(IweT}9p zT`oS#A?NpmIL{qj_sZn5-Z>5Q#OCX*-g%*UPd|PBWRUwLByvw%0liUY#BAYFEuU&(q&|rR-aUZ7IM0gJ=35uS-zyB~xSk20j1S+0szu99@ z`+LU6HFS0R3yjIu{PuI1_xqyifm*;ia)rxzd94?)AH?Y{*e`ZpEL6_2 z_&yNo`n5Hoi*5ncV^I#32ab_dBMpR2hivPw?k5JF%XL5mF)BPFxMZGZpW zsJbP*?m_~;+41=7wI>Z1iws`eyBgYTF$rk2M{ik)$psxJ>W!?%CE&UTfCT-S3}gL? z!F2ri@lu32WBl_@fK00I+yzh+y=T@VIF=h%6mH?0KlIvxN(<=5QzAcwr(o%NE#@*! zsGPLwt(0fOzs2t+cXX=^w=+Bra7l3}HMFy{d+^}Ftvmu8T4)|liEqZ7CyE3moTI1z zi$=FumQX&~sR#vz6(+P;V z#6m5k;U=tk)I@V!)oxCJvx^IMZN8Tzr{vWwxmVgcIDoE(Ev1Dr_6=FL%8Ykor|HQp z*<2c{#-q$q28N1ls3!K8kXm%dV%tc4iv*bAxI$ru>GA<4Pm*eK?va;cOkDk`=!*08 z);iX09PI7weSq4|FD-$7;1C+k5qlEq69)gGpuoAs6m3gGY|ovJL$UtKjF$Q8Tn^yY zX9~qxqVxXh)vF~jkF>Lw3V#d@RV;)+IVqhKPU~)gf>gV(`xDny80Tj4T(7L#R*2Nr zK_qLl)K=ZW1Y!$&=RXz{e4FP!DFE0?8?6NZ&+y!*+-LsAE=xmq!ye@OE+5L@L5qE#P zjo12%9Wfe|z>OOjPO&L!9LSk2Gieorz)z{yfBgI@Cz6zvm31tyziQ6c!rw`_V0K4` zX^DF|&%>T{ny~|V$HRMUo6H41U1&G+O~0y-+`ZX$!g+V&651rk3*k)M1?d;+{>iY$ zk)qM{^>vIXK6n?Gn@8Da=~6sgl2wzn7Bo?q5{}D%iPN0KtjtWdd+mW-4@EGcQN8by z4EMsBO_gis#wfRqX)2O{3v7i2Z=GNXT+@P51c>u!NCR{5n4^fQ6bo1k)TrCfn=O3D z#6G=r(l@4;UH(B?-sw+$nG6y zT`#H-tRCx1mG&>Wg8VE+P94z1D@V;ajy!A2A`KLPEAWeio(pDBSJ^-56*w zBkog$1qE|k%hOCU0dE{dBKqyKK76p|9Pz%hVB7;Dm+yMN{-O3S5^K(Vxo+!41|Rqm zF|Y-REBwU6qnKTt0_1HFU26V(r5KrRZdT|@lclOh(Q?|_*$od2>TI4B3)m9)Y^G0rBDtL1XValwO@H`=~_Jq!g} zK;R~bNFc@Es4+U+SHYJ(#OJj=#-c{1MFS)-KR+K}Fi4bd-oCwbE@HO&zT9&!r78L; znK+yjQ3deK-n8t&WwY&eeSmh~zkj!7v_nl@KD(c#{%j|7==iJ*(}JrQ>CoGk(olvy zo=sVUjtSJc-u+5l<7BIiU??@borRI(onxNde|)6CIx+T_5*BW*Ce%^Xdvr@@pFJ8r zb$4mmI2Q~pH&#m}&r(btA*A6VxGtP28mqrTsI<+@qMt%~s3jfNvZ1Q#`ehbKJk1d^o7JLU#Wn0Q4E|u8|j|pq>6b+DCl$@Nd!CKY3 zmj+*?Hg|M9g(w`1=$LJX`QYO5Roz>l{sqeevT0*E~-gM8QBwsSb|BNkfg3|_2-^$bTHHco|gr#VJ4p{Oq2uRM+ zBrXPbx6wDy%cqxZiLCz&v3bw8iHR3LsSRtkXfPL;DTOa_SCW>bR$BG`_TT`uGu%q|?yO zLALljdI1_TxKW5!HdeXUT;-Tf7_|}?-B(*7*ceu+XoT{eGNIW(`qL53a_Z`l8(RbU zMm4Kll8At5#t8rb%D+%gIFX7*z<|E0^4H!LI8P#4ZI(0AyaGIgtyurdD ze>8i6s{S}~=}4<}@kl_By83BNspX^o8;g^B+pBvAn`kTy%GWua8p$D_vH8uS=&qTW zCfFX<#fkr+;=D*rWdYB2b*$zKC!7Lq0&n3(+n*wP@a9m;&3&0a zo0?Gvo?~kYBvIfw%R$b z%ep4!JuQk1Rz|hi;@ez_`@Qp?BPC|?@*xAm(SQKcWnS$e)0({Ou2wV^25sSRxY9z{ zc%XT;$MGOY)e9(1B{i9roTmgcAd{^C8K{`v)hU2HKz{Gtdpe|eY}Za3d(hVc#v93& zx}4B435G#VPR!|`+qYiod@s`k4Y^x-GdX^1kWoEm(+Alm6+F*9)Ku8DpZ~gJ7BspS z(a}0OI`Xjm-qq5Y*XX7wVDGW~D_AiqSE>FAUXF!CuYs#)3qAi#Ir?sN$X0Wlb0 z!d5_CfN0VCHZD~=p0`Z|@9O340FW+ab{7SmR3`w|@SZ#JBmB`*>Sp!AK?fdWU-#D5 zmKgY_wnF3=lRC)UqQf~<->Pwdyin2tgctTd$cs?A5WvYnAzc|()B5m!#n%Q_4s1MV zzf5ATh!l;m#>tnGKTR&e>gU_W21IMa<}U#)5N zv_5gdq6nHER3{q{W#QRA={c|%aks}AUqn6w88AOzeV1r1pd^t39@i>p>peeyT+H?* zFH%-O6P#34DJFIX%6&G5ZNP&Cq)Mi3zhUVTvGP-I>zi?3J{(WUGItz1i#G@l8C}DQtAZX?4o!e>~>sb;we`*Nt zO%j;_!}Z|Sp@F$qj^;Mju-_j`!bslDD4`3S(#P&aDrSlly4*;t@WPz~NRI%+R&|QL zT8{5kwY&+|Okcl6o=W;CZ}fq}r86K9Jer-Y1v78O#Xb7Hpix!0%yJ$q8b_?Me@#V2 z>mAm>qxB@e6(GgT;fvni+b8d$ipKyxgE#h*MN?DL1MFTf`8B22>7Xh?18eb_ID(r0 zsjH0&wsM7yHtJknhDBbauD36>Zw#hsm7|l5X#TSBd_fG4#u{3gW-u$yk``T66b+~tRt?B6N9-gNM=-^~w*9L{1-VVkOc{v1P=pvq2<(VI zi^bjc#~vlBJyT*o%u6_Q5f$uV8HE;#bDSE4@^x?k(gM5>I}O|UQ$TQlZ+<8)j)j>I z5Y>H6R(W$St*Sk{tV|DsnV5A4=iveq6Lf$gmkAjumS{(O?J*imR+c3#QN_L)-8gwM zsq==I*mKju1m=ULvR4rgCqY62j|9=t;zdPCxzZwg0PMLjg1=75$|}pdJ$vX))#WU% zYgRE^lM#}SL0<+V3Mw_o1aRyjc2a8^4FVx=TUpEg@;6|6X7)gA=P8)~KyzUNp`B31z zCD|^YWwXls)#@go5hV|Ty@L25(E-ZWuU{8o&yolhq&j9Qvj7qu^PB|njEYI@(cgSe z@91bU!TRhm*oivcC{{iFJ)?X*FjB$EhjR^o0W;CHE{O{|g1ybe^m1z@bH-wv)s$J= z-|iRPJ8$XiOqtoEB*f5(EWRBBo#A%QE7IoLKuy>RfsO_7hs@$)T7&Ulm%)?(@d~R{ ziiUKR_T3GXSwFZV^S#-nEr3DpavkNRKc3h*7a*cZC&2xLCQC!HSS5n}pgX7kAlFsw z#RP$qi4p=ykmP{FIt?{=NJXplJ(m8p&(h~H!r>K`2A zD@JR2_wTnIp^|#HkDegd1O=sn9t&D0>??Tsj&Z0#GsuMJn43c zj@xNC4Ys|g2`C$ohynGj5*(mO*EbLa0P~|;mRM7^t)8RG5es{iq()fV*Z|P`1(n_k zDEYSY+o7kxv~DC?y1Kp;wCVaWIC$a_M2DcU=EZxCQ&ipz4;VsKn2I;?x1mc$g_rMS zYRzG>^WdTZDYk$Y-Mn~{orV-%0`(n^BMs_x^Y*~;kXwx7X@B9igVEl(se$|++t(n( zB7kzY2iePM(Lp|^!gca6EiElrv9H=J&tJGO`8AXR0pe^%iIOorXQ?$W)FGSJMMeCO z2`v#Ogf-yLA#8UVcB(l7!d~tRecVu(U250QLH`3k4TfHR4XGBYAw4b#DyN9ZvtdWR z-}nIfbLX}o33a@lR#WPhk&#iV(QC8m>FH!~k7qESluTkIZU&1B<5&SF!vjB3daY%- z-8YE)Db4n0TG9+ls^?#bP$Y>ujv|h zDndrV9Ci-05CF4!dBQ}L>_2r6S`mNk8V$H$O}O=p-g%$x*#y!C$d=vb21P_c>n`1~ zgboD-h2|&`sR{rpl`;RVPAs#=R8_* z3$g`})qyMr92ex8m#EsFy^Ci495e{2KlMlQ4ub?KC@ZT87#@OD5b1%Tz>%zewdKO< zH`0kH{p;@GiP*WhlDA>5Vrqar0h#$0ZsB{r1yTy8JEv0+@V& znpb6ViTY0!7UF@R!kP@zIb%frfKmh*F1d7Sa1N`3@C^2GRZuJG$nC0s1BZhm4tAbz zWv@u08XOSd4d8}A0@Kb36KPKbFb#rucm^ayO9z+mc(5Sx%lNW~DK6J@K7M=$9uACH zm&gLk3f?_5#U;Cv1#m$ZnDR5Y^5%3+_~;==2T&C-d&h=z&RtnTs5R%*j%oN#-&4Zo zE-BusnY~;fzCtbyU z%zGN1`GsdqPN{mt4^?jR>oHOt^v1s}tA6*^00E%@pVdxL{ryqzd^7%Yf6W1P0KDb` zwhA9L{+b?86>~m)8vRMAPOHA`6;)PVuD{C+KiC*XwVFZ*I03RS6cbE-I>q{`sco5L zLH|T$e}=A?>|@x^LE9Z>?%+@CvTZe~(% zGPa8Tix16vj_b5|Nlv*~@g^+fzS-4MO53z3GI0y!C#>tO-Vv|Z-$~ZU29*x%Nhfli z*tSn^2S!iC zIqrYz^yxDZ&A)&D{sj>(bLeuw!bwT6ODlNy7RiG+Vl5+N4?&k^i9P|My>z}&*k2}; zZ$6k=ShISfL|q89Z3viv8Uaz~lJsP}1!(8a&L4`3+;lBq%|`yzL64lzPBUHKcdvHP zj?jy3o5|hhfJrDWE+#dB=oX}tcrFIdSN)(npKGn#Kb`A!g&s0()kkWi{csWZzFH}&9b%d5_Qq*Ls=_JGsOARYi3>bn2jZmE%Ykd?$K&aZkxIVlLN1RF8 z?=|FPNbLYD(~@hR$S0z4k^_ z)mxZ$y9~P8rfJF5fowQ_iE3c+Lnr;~iOK`z105v&KNVsOJ_;2)OYuyO?f8ikfW-T* zZUW(ixYWI?Sz~S@qM|OlV~*GJK7AtPVu7Q9&rfzdWQ0b!^t?^AtT7VzDQ<- z;kO?X6KMO=BfKq0K-8caki4?t;hgpF1VtKjYeGu6!|(^B&8JT(I;#T(8vs3?#(gRKiF z4idiJVn=v2sTOBc$cm&cdCjz*Aw^e7IbY~2C0XWwniaW#X(vnhz9JOHYndFce8rmsF?t1(9Mf@)lchg~{@KSnl)dhjX~d^$U!F_!WcX z2gDBnJ=DtGody~zxTcUZx7rUvOk&1RsPP05_+u>!p?+wx<&SuE4Kk{p+$khhJw%!9Z|bln@Bl}L%rYE9 zQXK2x;6Qhm0a9zgXGjUh{QO7&ki6$Q;!Wi@MChxXIKiH~i*8!$0`p=HLd~;(CL8PB zoee5ir!5%&O1vq=$xGaP8W)>VvAJRD(K< z-?X+&IFb{l@K2wP=5|RWnzKJOsMlf^027Q@~eF*C_X>kj`${)$ zSYrbaeRyVT(1|f9Uby?Px%+rl8o|CyPY>8wn;lTG(o{Ds)a1VmSq`&VZ&761;evqB zj0!X_nI!I~)EQ0iGlJ_<)&@(b;Ha-o5cenk=@9&fH{HV%she-kZK>*z6hKbrIL2C2 zbnYwJzn~wO0^ka0oO*e>B3s<%JPA_DEA(0BRu9sWzv%m|q^kGTP6Oa6fUGyYsn6rP zT(|iWdgFJTymt*^SQ%5IIBpkd-zi%1+z8{+@C9Whl?a^(M9jmpwJ+EiBJGWKx;i`0 zP==ihl{*%*1xN=>FtAYn1~S{mPpAE34v_7DvjoP>m&i#uPPZCyiIk{+q0b;RD<~3Q zzdmX5dOV_HjFl!a1h^sqTKmy=5OH@DF)C}w;+xkYehd?vsjf0hc|vL^o}S=1!&ZeT zz-*~W=uJ3QlwrUs;DW_tK;_1K`uCCM6md<nO!{SC-vGJC!cst1vA*ii%`NH|c3;5^&~0STaCrLlAWQoJ19WeDy-cm(h5 zVAR_FQEC3i?8>N@5#$CT4QRmotYx|ZJWH==cdN&5c39}zciH6MJ6ILphbshtT!Gw8 zEmw%h>5vhvEv})at93M~PKU~o=!+W1l>&-Uy(tL-N*3Zr5zUWg10!aSa6xKoF}a2_ z$cu|HK1@C9k|A6=8WM+PfE?G;A?YsyI)85=`4%pq^_P8^&=b)aT(I0f=Y`d|-@dzuZ@|Bh1f9WR%e?!{f z5qm+ob@_CaEiJH05@!OZmV{EkmnkhR1tmB&_`p{}QgU^y=?N)_3gH=q&upAN?Cj_N z2S(^lx8VdJ(Fg2RK)q7?Yot3QAOKqGS029XRx4c1JgAVc1%d`ZK1ijkLY=rSCZ=?e z`iGwy;Ax2Iv(v<+UNNgNOW-q^8m;tzbabl!Qe}XsYJkT7nG({pKVE1-5-6Ia`y;1MKPWYX2*$Q^{LrJO^|=|If(X9lJZ%5w=H|mvLsCq>sD%aYSdaua z-gDCsxrZKG2MHihe4*i53Ddplm<2))5DzjF-_3|S`T$ws{=vJ-%KJ3Ihu~Y#vf)WI z|0z#UBY*byD_n{p9jX7u1XpV0wI{M6zy#hDI{ zFzgLD@{ow~1*JmFcZJ=wkUmAD1%R9yi2Jhp^P0dcxWb=ga8(!=d>N)R7{diaU|MwC zCj^qQK*V5Q-o6wQAgUSQ40aQEA26Sv4f6pPdi?oy3w9A?hv5lf6cv8!SBdLp#OC_? zoA<}ratOV!WGa8tQ}A6|P_Q%V4zvHq$SO}mfD#m#{I{%-T4#Z{JzQ1^g+dBo0xoKi z;^YvMgf#-Iz)tc=JP1pq_z-TL6vG>1S{C=3v4`=1E*HftsR$Y=OLQ3YEmBbT_is>b zNUkSv2+~ctVc*OpW`e7j07Jx5P?n>Tv5R`$RF$pFEkOB@P?ty<=t9-Sx{7`O@q-%d zP6+pC>*|)OUhCusK~N+6;Y3|v&CWvJNLVgh1xQUzl`7X5?V4V8gXz4Ussdta@Q-ymV_JY;j+Sqz8pHSe{V;xPSR7)M}AW$ zU|ryH6>WHXrOnA!JHunV&^k1-Au*IUfQJAg$P*Ab&HD1?Mt44)g#&v5g;CGZ!Bv01OS6EwX#>Z!qg?Nx%=v_pl%2b1bWI$b-K~gR;#gp477a zhvIW}lGHd#b-8CnbgzVi2lW8l7m_L|n*_J#fa;Pi42mQnE4<`J8B1-26V(ddPN=fe z05~Qc3MrQEhZLbh5-8kPJ@7k9hi3}UX+o;&{h~-1b@RXc{{Mzwt->U4xM;FedXkxQ)t+>?RgK>~Cj+k)N>@)c}GFh~nDn-}Xr2Zg>10rfi=OYYdVmueit=wMm61x8A{lB~ytuTVIy zlu^8ZkHJKeJ_haHlF2CQ{3&+``SU9`;N&H!c7`F1?nk@Ze=dohUSqD&rXMg#L%8U1WbXKoacl)ztWi zuUI{nUW!@YQ`YGtTv6&Av=H`&cLVtU*KOqGUD%E5n1x&?9Fl9-R?k+;Px(1`OBO_k z{Jj}#fqOq{^8fU*Z_^Uv!S(3iI@W#qU>0mkBTgofRW@kX!)x!?*qF1Y-AhtFJL6{Z z^quVB?^Ts%j!CaH_Xr2(PW;PJ(#>A}=@OpNssiC(Ig0pHzE-lj`R!wZiqutPmSs0% z*-T|CH*my_Jt_l#X{NcB?BJ$KVYA>!q|6@Mu6M>`IY(NQm)6y;{oiZH1JkcHHkNk;_qSjYFjmYR)~v93+3Qm3c2Soigg+P^TF+INoMc96EFZ z-iGd(X?Kb-Kc)Vjc+N5W-KF(d2Q*s5axi%Ott_Wm_vw7QE~m7l*uq)U>PEs&h7*s| zpJBsQY^kw~tdmvjmguSH-A8_<3@%NwVksY2_XvABxn4sbj6|%rQ5GaIo6v3V(YIF2(2X^TWn@6GW%OsZ)XH4`)}zH*X)Rg6n4}BwE+#JlSCc=x8 z*x|*)ekkG$t`a$*{*o|yWx0?(uN6;3cZe=*_d=d$ zT!x3ci>2}(_@p7enTZ{|rnvu=deE78(01at@qUu8xq7R~!{3zX^&bauX_0}8vYxww zJLj3k7jcEFtJs6s^3~6N>3cM?e?~ROA57yWl}!o5w&J}4E=25G9yY?>OoYVmK-@8W za9v=_LSJm*<~vjW!Cl`2JkiKTJf0fK37_Asn5VO3?(?z^gZ8#1Iu!+GH`S)bO8uzp z*%Vpc!3N#NI-T9`CSo{(Cn$M3ny9*$V^2GW>Y2V77)dl8oAZy8bBt@M;*#49QtI|g z(stb1+y8VjOZeFZV~nIZ@mbnq598`y*ZO!fwCiiTsS~NYvbfQcDKi-U(FeF!%=7(D z>H6GFwJ#0m6>P^|ua&{?1vu>W(og3LzCc^A*$So%_wVSf%MtqA46y5(bzMKO zy%vnb?LU+$gcO}R6w!21snD6CR)x(wqN%F2I{(x*VLH^MFkk$BrCWcstWPRtH-{6w z|CJe;V_X^Xt4f&Pu;oF@AUY5?i449gfO!Zv5MQOhQ0ibP&M^k`$IL0@4{(Doq|nR1 z`}WidCPc~ zRmd^h+DK)Q;wYbD# zN}A@}?E8!f>eY0gDcgxi-91@(jre0JCjG$w3)8(y$;Bx5mGyLWeaE^D4d;|dy?9ZH zZ2{IRX>zvg1EE~PRsk*O)6!p^7W>C^3bmiB!+gmF+*D}VTn P6$oVojoSrs=1=|?qxR4n literal 0 HcmV?d00001 From be0c09f03f6159fce3d6c0a2eb60d03430e04610 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 20:13:50 -0700 Subject: [PATCH 39/48] Update README.md --- Project2-Character-Recognition/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project2-Character-Recognition/README.md b/Project2-Character-Recognition/README.md index 96f21ee..1728b29 100644 --- a/Project2-Character-Recognition/README.md +++ b/Project2-Character-Recognition/README.md @@ -29,6 +29,6 @@ Notice ```numSamples```. This allows you to set the batchSize of the Neural Netw ## Tragic Historical Significance of the XOR Problem Neural Networks are not new.In 1958, [Frank Rosenblatt](https://en.wikipedia.org/wiki/Frank_Rosenblatt) proposed a hypothetical model of a brain's nervous system and coined it the *perceptron*. Essentially, this model fit a line to a dataset. However, as seen below, you can't fit a line to an XOR function. -![](img/goodperceptron.PNG) +![](img/goodperceptron.png) The perceptron got a ton of hype in the 60's, but two authors published a [book](https://mitpress.mit.edu/books/perceptrons) on emphasizing why perceptron's are terrible, because they can't fit the XOR function. This single handedly resulted in the first of three AI Winters. From 486aaa5cb48a29255ccb72aef70b834fab2f6ab0 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 20:19:56 -0700 Subject: [PATCH 40/48] Update README.md --- Project2-Character-Recognition/README.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Project2-Character-Recognition/README.md b/Project2-Character-Recognition/README.md index 1728b29..351ae16 100644 --- a/Project2-Character-Recognition/README.md +++ b/Project2-Character-Recognition/README.md @@ -15,20 +15,24 @@ ________________________________________________________________________________ ## Additional Implementation Features ### Variable MLP Builder & Batched Updates + Define any MLP very easily as follows: ```C++ - //Network Structure +//Network Structure int numSamples = 1; int inputDim = 2; int numLayers = 1; int hiddenDim[1] = {5}; int outputDim = 2; ``` -Notice ```numSamples```. This allows you to set the batchSize of the Neural Network to perform Batched Gradient Descent, as opposed to stochastic gradient descent which is the base implementation. +Notice ```numSamples```. This allows you to set the batchSize of the Neural Network to perform Batched Gradient Descent, as opposed to stochastic gradient descent which is the base implementation. This required that I implement an ```AffineLayer``` class and construct matrices out of these, and handle backpropagation for variables batches. +### Variable Image Sizes + +This is a consequence of the previous feature, as I can accept arbitrarily sized inputs and outputs via the ```inputDim``` and ```outputDim``` variables. ## Tragic Historical Significance of the XOR Problem -Neural Networks are not new.In 1958, [Frank Rosenblatt](https://en.wikipedia.org/wiki/Frank_Rosenblatt) proposed a hypothetical model of a brain's nervous system and coined it the *perceptron*. Essentially, this model fit a line to a dataset. However, as seen below, you can't fit a line to an XOR function. +Neural Networks are not new. In 1958, [Frank Rosenblatt](https://en.wikipedia.org/wiki/Frank_Rosenblatt) proposed a hypothetical model of a brain's nervous system and coined it the *perceptron*. Essentially, this model fit a line to a dataset. However, as seen below, you can't fit a line to an XOR function. ![](img/goodperceptron.png) -The perceptron got a ton of hype in the 60's, but two authors published a [book](https://mitpress.mit.edu/books/perceptrons) on emphasizing why perceptron's are terrible, because they can't fit the XOR function. This single handedly resulted in the first of three AI Winters. +The perceptron got a ton of hype in the 60's, but two authors published a [book](https://mitpress.mit.edu/books/perceptrons) on emphasizing why perceptron's are terrible, because they can't fit the XOR function. This book single handedly resulted in the first of three AI Winters. If it weren't that book, the students of CIS 565 in 2010 would also be implementing MLP's in CUDA! From d39ef9d1fbe7c34a953fe9883ddac70e9d9c3735 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 20:29:21 -0700 Subject: [PATCH 41/48] Update README.md --- Project2-Character-Recognition/README.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Project2-Character-Recognition/README.md b/Project2-Character-Recognition/README.md index 351ae16..fd3c3b7 100644 --- a/Project2-Character-Recognition/README.md +++ b/Project2-Character-Recognition/README.md @@ -12,6 +12,13 @@ ________________________________________________________________________________ ### XOR Convergence ![](img/chareg.PNG) +## Analysis + +**Background** + +As illustrated in the image above, I could train an XOR MLP via backpropagation. You can visually see backpropagation work by setting the learning rate to 1 and watching the softmax probabilities shift wildly on each training iteration. I trained the network with Binary Cross Entropy Loss, the network strure is illustrated as part of the Addtional Implementation Features section below. + + ## Additional Implementation Features ### Variable MLP Builder & Batched Updates @@ -26,9 +33,9 @@ int hiddenDim[1] = {5}; int outputDim = 2; ``` Notice ```numSamples```. This allows you to set the batchSize of the Neural Network to perform Batched Gradient Descent, as opposed to stochastic gradient descent which is the base implementation. This required that I implement an ```AffineLayer``` class and construct matrices out of these, and handle backpropagation for variables batches. -### Variable Image Sizes +### Variable Input Sizes and biases -This is a consequence of the previous feature, as I can accept arbitrarily sized inputs and outputs via the ```inputDim``` and ```outputDim``` variables. +This is a consequence of the previous feature, as I can accept arbitrarily sized inputs and outputs via the ```inputDim``` and ```outputDim``` variables. I also had the option of including biases. ## Tragic Historical Significance of the XOR Problem Neural Networks are not new. In 1958, [Frank Rosenblatt](https://en.wikipedia.org/wiki/Frank_Rosenblatt) proposed a hypothetical model of a brain's nervous system and coined it the *perceptron*. Essentially, this model fit a line to a dataset. However, as seen below, you can't fit a line to an XOR function. From 308ca6c898a2416a26d4c0110d41441e12f10dea Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 20:31:29 -0700 Subject: [PATCH 42/48] Update README.md --- Project2-Character-Recognition/README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Project2-Character-Recognition/README.md b/Project2-Character-Recognition/README.md index fd3c3b7..6bfa7e5 100644 --- a/Project2-Character-Recognition/README.md +++ b/Project2-Character-Recognition/README.md @@ -14,11 +14,9 @@ ________________________________________________________________________________ ## Analysis -**Background** - -As illustrated in the image above, I could train an XOR MLP via backpropagation. You can visually see backpropagation work by setting the learning rate to 1 and watching the softmax probabilities shift wildly on each training iteration. I trained the network with Binary Cross Entropy Loss, the network strure is illustrated as part of the Addtional Implementation Features section below. - +**Background** : As illustrated in the image above, I could train an XOR MLP via backpropagation. You can visually see backpropagation work by setting the learning rate to 1 and watching the softmax probabilities shift wildly on each training iteration. I trained the network with Binary Cross Entropy Loss, the network strure is illustrated as part of the Addtional Implementation Features section below. +**Loss**: The losses would vary greatly, but my best loss on the XOR problem was **0.005005** ## Additional Implementation Features ### Variable MLP Builder & Batched Updates From 6e08b5f77c585f7a3807d625efcd97c59de2092f Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 20:32:05 -0700 Subject: [PATCH 43/48] Update README.md --- Project2-Character-Recognition/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project2-Character-Recognition/README.md b/Project2-Character-Recognition/README.md index 6bfa7e5..d4eebdb 100644 --- a/Project2-Character-Recognition/README.md +++ b/Project2-Character-Recognition/README.md @@ -16,7 +16,7 @@ ________________________________________________________________________________ **Background** : As illustrated in the image above, I could train an XOR MLP via backpropagation. You can visually see backpropagation work by setting the learning rate to 1 and watching the softmax probabilities shift wildly on each training iteration. I trained the network with Binary Cross Entropy Loss, the network strure is illustrated as part of the Addtional Implementation Features section below. -**Loss**: The losses would vary greatly, but my best loss on the XOR problem was **0.005005** +**Loss**: The losses would vary on each example due to random initialization, but my best loss on the XOR problem was **0.005005** ## Additional Implementation Features ### Variable MLP Builder & Batched Updates From 2917ddf2786ea92f40e55e36f52e9277c6ce7c87 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 20:33:47 -0700 Subject: [PATCH 44/48] Update README.md --- README.md | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 3a0b2fe..5815fed 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,12 @@ CUDA Number Algorithms ====================== -**University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 2** +**University of Pennsylvania, CIS 565: GPU Programming and Architecture** -* (TODO) YOUR NAME HERE - * (TODO) [LinkedIn](), [personal website](), [twitter](), etc. -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +Dhruv Karthik: [LinkedIn](https://www.linkedin.com/in/dhruv_karthik/) -### (TODO: Your README) +Tested on: Windows 10 Home, Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz, 16GM, GTX 2070 - Compute Capability 7.5 -Link to the readmes of the other two subprojects. - -Add anything else you think is relevant up to this point. -(Remember, this is public, so don't put anything here that you don't want to share with the world.) +* Stream Compaction +* Character Recognition From 3e935a86e8b6910b4d5b2078640f67d36bc7ee2b Mon Sep 17 00:00:00 2001 From: botforge Date: Sun, 22 Sep 2019 20:49:44 -0700 Subject: [PATCH 45/48] added last graph --- Project2-Stream-Compaction/img/graph2.png | Bin 0 -> 21237 bytes Project2-Stream-Compaction/src/main.cpp | 2 +- .../stream_compaction/efficient.cu | 2 +- .../stream_compaction/naive.cu | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 Project2-Stream-Compaction/img/graph2.png diff --git a/Project2-Stream-Compaction/img/graph2.png b/Project2-Stream-Compaction/img/graph2.png new file mode 100644 index 0000000000000000000000000000000000000000..52f2c5a8ccd18a1c7d1e6e5b2cb146325d733fc5 GIT binary patch literal 21237 zcmcJ%2{@E(`#-J}sqjQv)KE#3N?EduC2f|hsiZ6kNwye9CR-(*wvlXEhGdz_HYl=- zO7<;`bt=mkGbsD+f8I!X-{<`<|Kt1Xcn%N4eb0Sg_jR40^RrxYUt3d|eJ$@=CMG6! zRh1KGn3$N+OiU~0*08`UH%f|Pn3&GwsGd+dd$YN(jqT=Hr=T%=ZLEIa?NiU2q&8^t zu4-a?AS@hxMPU8<@@lc7Js*^Hn4{T`tupguvECT)Y4s}W9c$LCdEli{u!pf#Rpswg zm1CP!);#UY8JMWLMNd4tPxajEW3Ts7s)oW@&jfZgB?=+9{;F9Mnm%Q($iVC+5-OAZ#=v*(jnLXi*WRQ;(#Vq&!k0&P)y*)r@X zK_TK6rFm{sYtXU#8J%OHY;PI~2aD**`L;pIa~i6uKf_4=l*y4mQnZJ0hVf|2^zg7F zHtzip^Nt&ZmCh8mU}_%4eM;l|_wSu|ITa33q6c1Y%J7)(;P19`N$tQA&CShKuU&g- z-TE%&M6iIIoI#vM*q+8@VH>wx7@zPU2zxOKZnbu5`OlzFS<1`7^u`z6DmznVn->V^_hFND{ltrQUPWJyC?Q3B} zdl5WuwH>%XD%f6=i5q#{#7EnWd|e`qO8oM^|<4~Uyzq1DD-oeVaw2$gh+tD@l_JILiE=Id4o)#lID z>lv3s&P`hk-GW=OyQO!A!q$*Uof|Qp6D$iB&6qh@>`8K>pLZE+bMAU9)qW^n*SGpz z+E5hQ1k+t!g2|Q&(sAw{Wi*-2db4bunc!5MKGcXy8=Ig;hEZRUoUcmRG@CV0wG?Lp zs0#9-#4&y4*R6&udPR7t2H#l@x134z5o_{gcO#D~i%7R1YB;lnJiT|wgE&>OEqSZc zx5Cv!%`Q}eb2%kU!7Xb7RqV?zT1(fAjf)GNttq}P&tf=+SKesowm#{d>x4rRUn>3x zT{BC>MCkD08aYivJ~#K_xva!cMUOn0#)^n|U%Rf-z&$r+Ch5uD2Akc)uk|&*b!oVy zL~mS+o|GENLC;*8Sw!KcR7iiwKvX&XbCd$tJD39&|kGX6cW*cK(701$$nbh7&eA?re zwuhYl+W&}S*W^q~x=ooB<7Vn3T)?NMK^hK|Zm;0b`FEzV){%pMOFrqrKaH*klM0)1 z+_Aq@?A@xVW=eMu4=eT4R;?cfZ&bvo$^+oDh zp}*RaYYz2Tqh_Xus*L)SoQv=o4L-z{9eTH#2-J^l#nj4YRN9fcOTHP;rwnGcW#Jt& zN7z}EWt}eO>v`xY85r{Sd!ynNTbt5xldViYI!L+~XxaR1Y-}ev&<&N;6w~5z^da1? z{7Oz|*Rg~7G``A0ywe+6d5Cy7L8I*l4IMh!pE4U?S2k=%!+&he#9(Z;2lnsivB?sD z5p|OPA%|_lQF>`$eZq+IzZWpU!`~RkI+r@h^LEyk7houhRai!F>w2F^X z!)?uBQg)#&!vnq9UvOa^=nQ<-5M_2$s%1iWvh|v8&)~IzOHsw{#WnTC7H%*x#yrAC zUYEV@>pX^IbV6s$esRqoBT6=E8dp=w(Ozpl0QC8{xI9tX)BsNl=E(A+yFI{ zlTtC<&~czg!Fkp@iM3C@_~D%%ogIC}cUHbd(~APvZS2w6I>Xmyo0V-d@(IJdgQ*D5 zAq$1pJjwap177TQRvfz)E_*iVg%@gO^6UG%nR#|?Sx()f$@#OgjeQi4x!g?oUAqd? zYwFl@^WtgN?fIH3Y92>&xUa5p791Iq%~p>mmc8_7!C9y|Us&5>PvQ&aP?KH5bBb6Y zohRZvJ3V1Az}RHIwUrGWk-$Gi`F4{>wr#@~XNeJ+H`phAw;I~0bDCpeQbytvHO?|9 zQ3|7!uGttp8pOicrgX!AB5QamzcFi96xugx$J~(OoN1nM@W-aCM(5z6mLoJVIq+W&JVLFIn=xD!I{EkUp2`; z&1Np-AE$&kn&q|-(OP(UdjGF-`S4&`KcmOWAkxZ8sIpCJ>x<~s^JC2%U1n<~ehW?W3D5ixSy6sIeh!bj*eM2WlO;uWm#H zMkcBw>uBtw4^r3nd!e)tcrl#4(=M5gUAcT7WpSGl?5GK??C7jI;#}iM@1qXCRnC0J z-G@=med2o5ezZar?#0_Qn*Jtv2b+5+DEMy$XpEt2a6~2cjl|zF>(CK7MlfO94G>sp zrJ3tR{s9Yj`Y}{HVXIAdd5(>^)IF4aH>lK*l;k^Ih&TFhjF`|Xa}A%&a{iuH`W8=9sUBr`uqqh>A5kh?)?q*nvv=7BXW0}1b@rYSV{p<&5r z3Q<>b7~5w5#rf6P0L4zTAlIQw%@1m7!0vqK!o=wJ4fStxHeeIhZP<8zR4B7Wv1;<> z5L)r`&(HX5-{hD=yi~eSk{8M@iBl_Gsed#zzUpDNZx!KimA_4#6u+JOSmu;efYa`C zMNv*QckUcKsuOVdgFd;YFN0+R`+L0dQTeT94`m}(tRaz1bEB`=tYSz>G^pm|#GcDr zX(lH7px7NIHT@zCXeHmQ=sm$5IN?eHx0_H!a}<;pd8-FcM5v4l18TjvYQjm>uoDxe z>$vK=i{5AycKAqfSxPU*{XDB=H>rJF{t!G&qo+;FAV%j$ic?X=P}a&8dp z8A2DoZ#Jn$?X5A@zhs^5Pvml=78{Pj~{;M!t<_)qIVuOO<4F zI!CeFJ?iRzWWluZ5JQ-r`f^k+y|&7QbR-D3lGv}^d+Lmox~$4(Nyde37iVThg*b6& z__L=ZkBKtla!r3*2`BAplm%UUe>h>?6)mkz*z~l%$L3$+nf0khdQMTARU4gmCsv)j zA*c5l7hF~^VyMsjf@2)#V&taK$78-#Z5N|w$Z4=sb;y=9ph1)^}SMIw!u! z_=V0%p?$g^GiG-op)8D!>AIO@;NGgwKU(FB-!|zi@u6qp+-iHLmfpud*B|xtM!8v( zg-Y2Gb|*03NwXTJtA4{(s^}EqXQnT?Wu!l^YWnH;wegijLzJsUW2>|IY!U}uM)v~BJ;(#@Y&h*fDuGZk6oZ_-jJuQ1e~ znQ6KN;1wgJj>KOOLUpl+*P%kDRt$G{9Jt0ChW03OH!hHEjD7Cr{Q|!eZr15`$eq%x zAAh4;&TAEmGp*poGMwdx52e55jvDapxtyS+!up7Sb`E{~QpiM^Q;7cMki5ycvdJgO z-`AkkJd7wQb(RMwlkm1*M+jdF+f`GE!8u>(qnnkUbHhOkrmV%=peMc-+Od5Uo zENez#JDYP1#meoxPnv-K!YH&^)|~#)%X9b5=8?}q9WG}pH4kA&Ci3fMx`EB)oge-j zl*c!cS>Kq%7h6A-=$w>zIxc%MOIP-z3UqJD3M_8>v+BOxJd;8+Q8w01QV*8cuqL^dne8uGiau2#cv#b)k5K3QO05pB zXu^80#?n;Q&Y1G8-q|TJ{-?e!|4{b$3eUM%YD$lv1AW?1UXcsPdh8?H>5-QYBKWnr z9D98tK4)4~MVxut&W09NSHjFWWpU{0dETL&m7+^V>zQ^)GJBGA)o4z$ic^dbY@Y8uJo$jYCzi-|1%d$x{`YB?ve(-meKXib9Py_yf z$S;Tq|3K##WQ0G&gnu|34%B{)l8F>VYhB%Rk?3Bz@27dL+what=FGR%rP4g#>gaOR zX3dh!Y1MCmITX98-migcnz*uw)eltu?mW2$(KmkN$yDd_x}CrNyhmHqm_LT4FZ>w@NolHGhBKnLDoqO z{Z3U?Rm|-3U);Q3BIIwyz#Q5m(VF9;+wMMP4CVvvxi0RS%}nw zeoZyW-_LI+3XKgwC@1Y0FW=SjXWxcQs301N-S~QR=wQA$@Kt}Ih?|8S#r8tbAgA`$ ztY=pc%^QCa@|uXAtf{H#&inXqbJ$+vxk1snD{&t3W33pj&6}xdCRF~paUZ#fPJga# z+rmT`JsU#y>c)}t8U&8W8fy7E@O<(8I07pcls2fFU$UI&G43;ySABGe_YrEZ0J_M7 zIuLQL;aR3Z$kM#~B!ZKhkFS3SnooV-?J`&H+@6g`Ph~|@GVF{oUo1es*t3RK!;w2s z31#`hE=y+7%9ba5wlK0aVd$0+2{G($4yE~C+GB_22Ex=4;&0TVl!AO zD_TkFC{68^+5ebV@+pw6lOTf0V-7Qy!LyA~y34|6to=f|urgc*V@>@g?M>iR7%wm| zC{);DNvbK<)mGfp*ujcaj#bylHLsNX_h?Mcp~sqFQaIwD11PSjWkyMoAa6t~JKn z=MGh()8p+0g@nRHoM?V4?DpYFy49(@reETNEt#qbH{~-avY}B?n!Zn5XD6sngM#+h z<&Ff&WEAUubfZ}o3!s}VnlYEsty2siS7d#^s5EoIz?UD9Q2%;vqD-@BbnI)c5C1D! z$j$26kF)luCVkT7+acXCdJJ9}hnSG|SjXmoGy%3OU51qjkz`(9{IYInIL++Tqb+`P z;Y%k@cuj}p)cXhgUG3I z$#boVB#c!VOQZp{$$lODVsr46p|RVjVp!W+^tKoI-##u>#-s#mKORb3Q066fkvDnL z&vXjcg$Y`U1eHE^5nr1aX6*VsTSQ(+H#PeB?y2(p%|@HMCoGJ+%t~sxBg-N=MGw3= zPiXtFZo{Hv{704lZ#3UUt$DfP{Kw(T(dA!2qJZi;{>>+0)p!g@e`#Dt?N$y8_GlDx zJm}~27-tq)p+GC<_b?OmY+u!Lx;o4Y^%u6_(}w3#O2#X_P;javc&1#oNv<6X`2Nrh z=HWd(r{TLoj)DSq9QSk46@?t|^q8f3G(7WG<*Jfbn6O%JUxEBa$&L#P*SJj?&g{kp z7g)%#ZbMpWjbnvfI<`C<;S!kGDyM3TTCdgmlx{xc7X1aq{7yT%3qr zlFGzjmyL4V@+;W8anq(f8evBc{J6EfsIspmqibp)U+R4)2v7%o2&v{}q0uUnGjMr9 z0I8uSb^0gp&%wAyRKY%JH-tsxdJ zQr6@hfL!HcKVb$JurCheUJ`I&P|6>kHYt9cJ&@ND-WaXy6cBGhG|I(YT^pX|G87je zqNn-2qr)6@tMq6UHDWETChm0b!IE^F7PB8;-*;o#m5=*y1e%uxsb0DALZ3z1-rl~u z6-6*oUN*heQlP%eva|R0*2lvm>X0}xb>qbtefaRLf!whOIRL?QQr%mtMuV}&B*RNk z{sna7)ct66L~2$tz??(p=MCHsc%gW2-MY0l&+Qg>6|SwYajAllTIO0 zzo>^wl#EU>jt3g49Q9@N$Q(yxekU_!W;|ch{l||bjmKzp$}rf1(Wf3ig+@zdsJp`a z`-l5ivqDc2(`Y9*N#yXHD$?xkcaWP{TDU^hG@U?jK+0xkrYdxJbER*)+n{Xh1sUK} zd@>l+XMW(#BR|&-@F%HvGp!Imi;q1W9O@an^IC!DitaoNwK8PL1KZzf*o&?abTj=V+9Ck0eVwT&yJf z1>9T7+$!1Erjn0lXP|csO&%E-?x`ul<{Ry5nGN3+?-;Ud{&GAB^hV9>-);=mf?4XQ z-JQHuYtU<2N!;z8LaW!eE>_7Mq#HELr8;@nlO0WHP%yr{+Q&!DQEj`(x@CV@i08Qp zg=_JS{<>kJyl!c1|163H{DqZ~SS?fY+rOSTYSnr2)-3r8ySR4g>;FD#mIob~GECCR7sQ>%=KVMP-85VAO{>;AtY@tRh1F$uJ-V3#I5{hFi zEdDc@4(68N5tc{DUvhA8D0c%l<4m0T)=ORlH07`4ECTpKs;hUR$LJrE_H$zbB%ST~&kgHeP=qj7E6u+yd`J@Cns zCk7_U#~+tgRtA|C<5giTm_yNk1{ljA?@YY-R&0gPfdd+M?%s{PlwrH9pZ z=RgqiElQpWf*HN5qu5vK{l~0i>!#H1HU6_nCH_LEt0Lqp+@Q@p6My~y#_QAoSk>Xf zz4c|SM2eJizroPlD_x@6#|JzW8-(hU4AJ)=Jt`l%0M6qA4y14`!vW6=K$je8wI5?i zruA*k6q`zVO|)!jJqZKlxw*gYNsRKV8DMcSbYV;n;X zeD|Rrerny4uED~|XKmYfL&C8~TR+`8WcQvupSljw$;3HUViTdHf>q;mWQ;V z4ps1Aw-ZyESpUj3CjQ-ofI?>`H6m>_ue|f8D|2sW^TBM-bWg|jH9K^dZw(PMf1nX$xEH+PGCw1Jr9w#e-V;&wT5PmTfxGPy|M&ePA5JFlEkl;Z^7tWuv z)|Hw0`1oYMIS9r1R}(4Z*%Md-_NwMkZL}Qj{T-K~?yiY&!kt1q--_B-rcS+I4(A^i zH3Z^+o%rpOKH0K1=Kgwq1%ub;6Zg$bja5ksY9_yl`*u1xJr2xlrv&dBbM&pzgo%s6 zQP32nPmK&VjZ(Mi$CjO52Km|v+wW?t?)p6d3q^LZUVar_c73cXwqa!x-4rNmIXDA* z0;JDxTou&I`z0HI5w&e`^+e>BH zDR0l+Lwf2z+v9{)|Jc$Vy7B+E+a4BlV=LqU$XFr*^SJcC?6!e^JQi5QT+aiHRTY}i z!Vg}o%sJN3_Vn4fHH!jPc%F%DJgIZ`G7BH+_&gPeLjESTfMXu9d|n7YfMX3n5Z?Dy#QY-3 z?9-_rLK*#3v-^D(3bW&;Edyi_g5Fp)YQEM>K6u=Fysx{S{2O7 z?|MiJ5kkMyAhju&d*|x}aKjQ|a~bLQPFg=~JbF zA{J9|}|3z?0L zQnG1E6{L`cK#mHHyak$2#hW9s)G9K6R0>hi&SUo}Uk;idN}gx|j3^Ui(qDvxepWX3 zK`Y=+`NnKGW0-baUHyJpf<~2ObC#1;6#Whf7A%kS?fL1C>SRN!`9AIWA_l5rSJ;Rv zA0c!UH02QO=kE)_seE;HG7Z*&`(A|}n$a5!oTV|(oQ@d`=}h3F_V{Mp~>oD8A10UACwPEKb_!@sadi6PFH znA>%v=*=S4uguSrYX)kfg(vkTAvi@!xRg<8AkzoaP*c%@`f;2>q+{;Q(Fk(ICX1iZoZpy6Tp^_g-L+E^{Ac=nuWsG={## zr*HPjlfQ^{)M2?@%c=lsqX;{CQeQgHbAE7k)REm86-;G|*1t8bmC66`8i>*BPbPAv)YIUiz8DO-U9<9*v8^-nu)2;zvT?2O@8~ z@JLp|J0m|m&_8zUPQYH>Qvfc3^ihyFNs6JjVc;|6%l{06a_EFJJy=J!YP;}W0g|s5xv^D15a^8Bx`q|mn zXG^^Nph&m4&q3jc1;Ob|;zh}=64#y>$^Vc z$bgkf?bIhZL!O3*@BeQkFs-7HP+6zfXA>^y;8EzSc?|_ThyW&U zO(rSHVi}+Pki`lYZdv~Dsr|k?xoD*Q%GCYOFh;xl8^$m(dn6^#1sycp2kFi?ea_b| z*G3D2M|MQfV}U4Ljs>DbF-$g+FR83dpMe4EvMxIWQesP0ERh%%t;w4Jy4lrB3!<5% zFJv}#c!9T34}bjlF$Cj0+@q_sO#Lm^D#C{XrI6u;FqfE^Sa~0eUJ%Hdp{O}HPd6L7 z7tFsyb&U#h(-`qYjohD~wsqS{2k{Z0ZeuL=j~dhGz^x0la&g-FpGCE6LD|qrq=FQ< zRUY@o>)yQ*a*8QtMqOR~(VJ^E#~Kp!b-&_ji^DUsx6fCzjpsg;$|_uNIHTony#_6~ z8SAL_EJ8}$u3hdD*=25KtYV`mJIi&LxiE)C^QUX4TZgI??c(0kPFP ztF7?BD!#w4YWo?&@?03P>tUM-TXVyAxt zH(mPhc7b_tgC}C*L(*M89OEV*{83gfP6D(3nBe_I6zaNXX{|WArzzkQDViG9vtfO% zT8LB3Ng_@^SbYy;zOceQOw0C6S1UO@9rL$VGX)E6awL4Cd`l-Xf5}5H1znCoOmOh3C4J=P8bGpS0x0f>UL3urWdJFf;VU$mw1SuP(&H6L$hp z9cj*UkokRAdSuA*7k2J~o0yhmPcum82 zT_S$sxq5Mn(&9BZ5EscI7V;%|+*tSVZ>*Nk=|Kbdar8Fi0V! ze)~eS{6B1-^)E?vz~@`?CwP`QG&E-uYEMmcy2p^nGO@C5pb*~$%~Sdu6xWL|;e>z% zd6Kn%1U#55+2lWs2pDDC5zV!^gZi z`E3=9l_hOkFU(Fh=7-KDgQ*L6^$8x~Zxd;59U3?MX|1fRTVH?n0GKEnhnxU_0A}u6 zL?V{hB7L6SjNPY~wBORwvU~2~X3-#UWt#gR`uYa*N?!lL;9jz*aCLKqnt%=(pH($9 zB&oEVss8Nrsdg7CYOC~(9shcr;nMcUz!bQ4Q2RNRdO_J`c#Rf>+q+ebzY4iwe%XSX4w8q;~@(w^fj9I59jm)S;4d!MI_*yqd z-F7oRjM3G3QqIJ!K7G|%;_-(C)AM%i7A&L-1c0X=)@=e`&gwM?PG?@75sYvv)GS!V zw>T@FZ}cWWU-22?w9t`Ap&P*ZK0F7pl6Gpu`rKo!Ko)I`i^1Cgv7i&u8tH98&z=dx zwv~+eUWJ-4oD%~gD{+kWaqEMYf6xpx4{u(yJGIvqX@ch_C@7XPUkblE7aufn3m2IrZfLBwXg*GUOW+YX9{|-QB)J7N6nbiV z5GV-GoFe!O@fLv+-6u<=dEN&-S}?U&#vW$$Y&>G+V^H(~{7$>LhnCNOoYa#0>E#Ob z(&76DHkYrrS<7v3m6cY>xmQ;{V(|kRq`44pO4}$lMe1O<^0A{wdh62+@s~V|7gXu? z@D>R$3=QbX&^SBLY6Y9pZNfleBDW@eQ}LC9*FNVcPUyrOUt$6}FQw61=>?rbWT<4FF%^^o0x09?koeJs)`-)!LzrTkD#`ixqPY zh9**kK7P4GRlcHt9W1K6Rcdx*mrg(B@(pYr_TxWHFe~o(@ZnUzRkpP)D=ihvhT%o>%HKO92ow|g%#Ff=46%vy>*gHxE}W8Ob%oZD2N{@pG-go3Lj}nGEE$Z+N_kP0fwW3YYLARI zC<=iv5uPQFto)1j!@M+Y_2s3X7iiPNH`&ku#E-x3)P{_B9$gT{(k#$W1~UG=bJAR<6n3~16(0HX^jjhlY?gfxuMHFC;9Gws?n=m6z;72wyE*pYu` zV6ea-yRKAEMBVM&Jz(*b1z4l;%2+gGyiNuk%N`toys%(zo(!N&$mY3LYFe&Cd`vA+ zea_vu`SJwuL&^$MA{P?!2y!Ib#{}yc<=jk}2;YJm!Fx$-nkq>CXH z!K4M7aYl+EH&ia(U374yNV=%7u)GGv4sMP=Cc1n`jnG8e2vw8@45W`Dq(p)|V2^XeuTJB5{Y z`7@u=`c0RS6yo1!balJ@wG{losaJ(uSEsMXbTqOb=uZ?LAejyX8@u~K0dI6I5MI{X zY(4Zx12ms=Sr#T)E^q%|TBgA9m%QlzGP47Q!f!WsUTR-V39Vg+R!XZ|nZ78rM_gON zQBM;0vt^yY3KO9n>*k|B3tq3!GPnLH`KJ$VLN_iY<#&FEz!ZPDtgh=zmA+if(qT%L z>;GNE=+a&kFj!1}**&aY1dyVZT##bBVS1w$kk;!{vS5al2J97phnU`%{^7%kOz5wW zJ+Ivg7C@8tLQ+L@fDX8OU*Xd~xU+;^o0g23pWAp>A^rLk4sf;hjioLPQQp#_x~dR0 z3A-gF>%D@Z@JY`Yo+m?yVkD$E!?c*gvi@}-n>4h6T-&$rhh0?y(7rGALEJq^2=&0l zHzYUtWf1Ab{4Ws6rj7{m&^f9=zz8vO)2Ff^U|8hM?hjUS$RUT>X_uBN+x8s&<~NoC zAR79Emk=VLuC~mc6{Cy2{1wUB?{n}L(i{j0`H(uU3TnCE-@JZlk-YyRA-P)yJWieijiWpjJiS{`@Z|qMvbd^0oM+$hNj2Fhx7=;g z4Gam%xya?Bg>7RJA>< zFsz5(&L%=kOV!mir#w>8_!NG->0b;lsfQ5BSiJ$G3-k7QU$*G8WKUN27F z0=z)Fs^ihahvL+JP@v)qmaQf39U#>RAkoJCu$MQy?u8>Rj1N^Kbi$V*tOdvv_4%jO zj0cP-#?1ri*I{d;n0e{O5Cu0}(o}C_vZykC`{AgUCPm)eJ!gTko5KcHF|c&m)~)l0 z(deGsn)~C9X5*y=$$dU}voVmEisuXp|5CTqrH=c(9xJ#_etvx}-VB1q4zL^7vOOnL zJzTm*T`#T2FBY7m(s?0bvlr?$sQ{_J^Px06f6ZmxT$~feo{6yMstd%BHEsg8@__f5 zk;z?^9#JKZXj_oP&Ky8Q^ukrR$|vnaDuWz};pXMtw}h8rl2%ubg}Nu#?zKTOvOj*@ z2yR$y;Db1pyQP! zU0b}=xZquH@V5BoZvCB1^jS-KK6IS5M76!@*PU}rb1z55HDpqqW#16^g({1=o^4Xt zeT#v~<2qmj7HA4Crc8k5g9EwzM{tFST#GgGg|NpL(TudTv@WPmPoVAOnd%6av+c-s^VjPB#pETg0ACr&wyI&smd$ToJ>pk1LRR_8 zyX|otDB0bCgK#MBUrS0hxy57tCjOE$z< z=)z~l`rFLOBoljK&-(QGMZr!;!(~px=IMKW(Ko%F0vTF~}Gjch}=P_-6iCi-UV&y(y=#sYu z@=0JY@X{KSBMtK1+g3fJ%=9}uOb*iwKs2=rJaFm!)ynX_F!*9FgJ!g*G~&d>A|Yct zfdk*UBL+^>tuT`ZqGsrv7D4vhzVivf;6I2PXMn3c@9b7CI=e%LoN$fR`xYhf$J-Z2 zbPamc1h8B-J(-hV!8)uV&V-sb)nCSAJGc&Q>^2hkC0)k_dD8c^U>c zDWTid{7*b=F*jQ77kB3xxun$)|6)3B5k{;(z{6TtXg5U|WS=l7xhepCHpX_%Z}#eu zQ^IAP)MTCett(^g*P}<3uM8hs3AFI|e+mhO4BY`qU zV@Htk@RsRU?I3O`#Mff1hPU&}3tj#B{bM>QTGsQBR06sZ0Gi zX_tue@RuXvQOCh4~AJ_ewY^?Wu{`A@BgGAGDi@Kty z(b5BVzIf`CH*6z-$MpIcGw0%o_dA(T?+ZmdfhJNq5kl@N@IgBd`s^$~RAuIr+mT@# z$SA4%ryT`n3wVB?qS)vkLFegu^!exi5p-6{GZ4iPdVWz*{UoA}+UWu(d8*9kav}@c zcir63o+kct^0cl?8w9}s$ftyP#_mynwgUq3rD+^V2AR7+J*aZNjxzBXELMw{=89%r zdSJz*XeBfFh!cmQZVi{NK)p`bwUGg3)(%t^hmqcM-20%^Lvj*f_9Y-3XsArI4Y&&t zwIVB>%!(&3oetXX2bw=ON#fmwGM)cIC1gAOKLn+OyGx*??1d_&`~syK=3jFVZ%T%N z!uQdky>E$oYe~)=$eVJw7dBm3sb}mah;SM(>YMOc!VsvPcV*Exe>|5FGd%U4FVK%sIG^<2Cn*Ctp)dfdt5w&`d~t?eQa!$rpy4z(GPHdEMbr|Gvi|l^&2&h!n^5NSn9N zQFkbjGhHWAWm4u^?D7MRJ!a~+-%lyTM+|`wRrTz~(^SLmjR2BT;<8i6SEB6Al^E z2k9R-Eks)dG{1fOb}$i)&o1B9OGSat3k8{}9vKB(dl^U>HYUoxt&m=AHF&IXx8$^F zu322r-dsc94++Nkc?G0sJ^Q)=NQu8jwwM}7R=m)=Xw{bWHnTaj-2zpg!h7m&?IJfy zoX@r%KXxqK{gd|f>-*;U(PD=xZ1i!)MX?_UiM!O}zT$T46H7(zFAxyVdafq$%c^J& zx*yrK1Y=(>{!3e}X%$I)c}!%%Fx~R*eSyY;t_WuZmh|TfEA*j3{~EWj@eCeBA;+R< zyRh}El)qOin+S6M?XZRYWB}(Y;}`8s7TZHN>gGu*$Mn22_$N3(;^u#dHNQ4A{7#$x zkFn<8Veq*q?KC);Wc&t3HcDloFGf3VpqxdaU0 z^0i)CkEz?6lv0rGB68zj?{@P};{rw8sH{I?6(A9rlv%hA9#wM~(kfG8)ipHCVFqM` zz)pcD;K!&UTMfS!+(yKy^L(z?BALn)&Zm4y=CoxA1lG m-IpJ}xa9uh!u#=Hj#>YS2G{3+nla==swXv1Bptha=l=s#8SldY literal 0 HcmV?d00001 diff --git a/Project2-Stream-Compaction/src/main.cpp b/Project2-Stream-Compaction/src/main.cpp index 4090eca..e8edc72 100644 --- a/Project2-Stream-Compaction/src/main.cpp +++ b/Project2-Stream-Compaction/src/main.cpp @@ -13,7 +13,7 @@ #include #include "testing_helpers.hpp" -const int SIZE = 1<<8; // feel free to change the size of array +const int SIZE = 1<<12; // feel free to change the size of array const int NPOT = SIZE - 3; // Non-Power-Of-Two int *a = new int[SIZE]; int *b = new int[SIZE]; diff --git a/Project2-Stream-Compaction/stream_compaction/efficient.cu b/Project2-Stream-Compaction/stream_compaction/efficient.cu index 5aaff9a..de2bb61 100644 --- a/Project2-Stream-Compaction/stream_compaction/efficient.cu +++ b/Project2-Stream-Compaction/stream_compaction/efficient.cu @@ -4,7 +4,7 @@ #include "efficient.h" /*! Block size used for CUDA kernel launch*/ -#define blockSize 128 +#define blockSize 1024 namespace StreamCompaction { namespace Efficient { using StreamCompaction::Common::PerformanceTimer; diff --git a/Project2-Stream-Compaction/stream_compaction/naive.cu b/Project2-Stream-Compaction/stream_compaction/naive.cu index c81bbe2..03afff3 100644 --- a/Project2-Stream-Compaction/stream_compaction/naive.cu +++ b/Project2-Stream-Compaction/stream_compaction/naive.cu @@ -4,7 +4,7 @@ #include "naive.h" /*! Block size used for CUDA kernel launch*/ -#define blockSize 128 +#define blockSize 1024 int *dev_A; int *dev_B; From 9d71c38ce29c69ad0c508888238291d4f5a2f72e Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 20:50:33 -0700 Subject: [PATCH 46/48] Update README.md --- Project2-Stream-Compaction/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/Project2-Stream-Compaction/README.md b/Project2-Stream-Compaction/README.md index ab4b98d..9c28843 100644 --- a/Project2-Stream-Compaction/README.md +++ b/Project2-Stream-Compaction/README.md @@ -15,6 +15,7 @@ ________________________________________________________________________________ * Work Efficient: 128 ### Runtime vs Array Size (BLOCKSIZE = 128) + ![](img/graph2.png) ## Questions **Can you find the performance bottlenecks? Is it memory I/O? Computation? Is it different for each implementation?** From 6de1e95dae2d4dc63c98731a5e0cf08610329a89 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 22 Sep 2019 20:53:02 -0700 Subject: [PATCH 47/48] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5815fed..87eacf8 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,6 @@ Dhruv Karthik: [LinkedIn](https://www.linkedin.com/in/dhruv_karthik/) Tested on: Windows 10 Home, Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz, 16GM, GTX 2070 - Compute Capability 7.5 -* Stream Compaction -* Character Recognition +* [Stream Compaction](https://github.com/botforge/Project2-Number-Algorithms/tree/master/Project2-Stream-Compaction) +* [Character Recognition](https://github.com/botforge/Project2-Number-Algorithms/tree/master/Project2-Character-Recognition) From d2a288b1836cb06fb743a83890c0364ee3c773a5 Mon Sep 17 00:00:00 2001 From: Dhruv Karthik Date: Sun, 13 Oct 2019 19:50:31 -0400 Subject: [PATCH 48/48] Update README.md --- Project2-Stream-Compaction/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Project2-Stream-Compaction/README.md b/Project2-Stream-Compaction/README.md index 9c28843..a713137 100644 --- a/Project2-Stream-Compaction/README.md +++ b/Project2-Stream-Compaction/README.md @@ -7,6 +7,8 @@ Tested on: Windows 10 Home, Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz, 16GM, GTX 2 ____________________________________________________________________________________ ![Developer](https://img.shields.io/badge/Developer-Dhruv-0f97ff.svg?style=flat) ![CUDA 10.1](https://img.shields.io/badge/CUDA-10.1-yellow.svg) ![Built](https://img.shields.io/appveyor/ci/gruntjs/grunt.svg) ![Issues](https://img.shields.io/badge/issues-none-green.svg) ____________________________________________________________________________________ +## Intro +At a high level, stream compaction involves removing zeros from an array of ones. We outline results for three different ways of performing this task as follows. ## Performance Analysis ### Runtime vs Blocksize (ARRAY SIZE = 4096, 1<<12) ![](img/runtimevsblocksize.png)