From 2b2d153f1f1a3618eb9b20ad50285cf3534e9d85 Mon Sep 17 00:00:00 2001 From: shrekshao Date: Sun, 13 Sep 2015 20:08:58 -0400 Subject: [PATCH 1/2] finish+sipmle radix --- src/main.cpp | 173 +++++++++++++++- stream_compaction/common.cu | 72 +++++-- stream_compaction/common.h | 14 ++ stream_compaction/cpu.cu | 125 +++++++++++- stream_compaction/cpu.h | 5 + stream_compaction/efficient.cu | 348 ++++++++++++++++++++++++++++++--- stream_compaction/efficient.h | 4 +- stream_compaction/naive.cu | 95 ++++++++- stream_compaction/thrust.cu | 2 +- 9 files changed, 777 insertions(+), 61 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 7308451..ec0c654 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -5,19 +5,85 @@ * @date 2015 * @copyright University of Pennsylvania */ +#include +#include +#include + +#include +#include + +#include #include +#include #include #include #include #include #include "testing_helpers.hpp" + +cudaEvent_t beginEvent; +cudaEvent_t endEvent; + +std::ofstream of; + +void cudaRecordEndAndPrint() +{ + cudaEventRecord(endEvent,0); + cudaEventSynchronize( endEvent ); + float ms; + cudaEventElapsedTime(&ms,beginEvent,endEvent); + printf("time:%f\n",ms); + + of << "," <(end - start).count()); + //fclose(fp); + of<<'\n'; + of.close(); + + return 0; } diff --git a/stream_compaction/common.cu b/stream_compaction/common.cu index fe872d4..a8f9e94 100644 --- a/stream_compaction/common.cu +++ b/stream_compaction/common.cu @@ -1,3 +1,5 @@ +#include +#include #include "common.h" void checkCUDAErrorFn(const char *msg, const char *file, int line) { @@ -18,22 +20,62 @@ void checkCUDAErrorFn(const char *msg, const char *file, int line) { namespace StreamCompaction { namespace Common { -/** - * Maps an array to an array of 0s and 1s for stream compaction. Elements - * which map to 0 will be removed, and elements which map to 1 will be kept. - */ -__global__ void kernMapToBoolean(int n, int *bools, const int *idata) { - // TODO -} + __global__ void kernZeroArray(int n, int * data) + { + int k = threadIdx.x + blockDim.x * blockIdx.x; + if(k < n) + { + data[k] = 0; + } + } -/** - * Performs scatter on an array. That is, for each element in idata, - * if bools[idx] == 1, it copies idata[idx] to odata[indices[idx]]. - */ -__global__ void kernScatter(int n, int *odata, - const int *idata, const int *bools, const int *indices) { - // TODO -} + + + __global__ void kernInclusive2Exclusive(int n, int * exclusive, const int * inclusive) + { + int k = threadIdx.x + blockDim.x * blockIdx.x; + if( k < n) + { + if(k == 0) + { + exclusive[k] = IDENTITY; + } + else + { + exclusive[k] = inclusive[k-1]; + } + } + } + + + + /** + * Maps an array to an array of 0s and 1s for stream compaction. Elements + * which map to 0 will be removed, and elements which map to 1 will be kept. + */ + __global__ void kernMapToBoolean(int n, int *bools, const int *idata) { + int k = threadIdx.x + blockDim.x * blockIdx.x; + if( k < n ) + { + bools[k] = idata[k] != 0 ? 1 : 0; + } + } + + /** + * Performs scatter on an array. That is, for each element in idata, + * if bools[idx] == 1, it copies idata[idx] to odata[indices[idx]]. + */ + __global__ void kernScatter(int n, int *odata, + const int *idata, const int *bools, const int *indices) { + int k = threadIdx.x + blockDim.x * blockIdx.x; + if( k < n ) + { + if(bools[k] == 1) + { + odata[ indices[k] ] = idata[k]; + } + } + } } } diff --git a/stream_compaction/common.h b/stream_compaction/common.h index 4f52663..bd7afe1 100644 --- a/stream_compaction/common.h +++ b/stream_compaction/common.h @@ -1,5 +1,8 @@ #pragma once +#include +#include + #include #include #include @@ -7,6 +10,13 @@ #define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) #define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__) +#define IDENTITY (0) + + + + +const int blockSize = 192; + /** * Check for CUDA errors; print and exit if there was a problem. */ @@ -27,6 +37,10 @@ inline int ilog2ceil(int x) { namespace StreamCompaction { namespace Common { + __global__ void kernZeroArray(int n, int * data); + + __global__ void kernInclusive2Exclusive(int n, int * exclusive, const int * inclusive); + __global__ void kernMapToBoolean(int n, int *bools, const int *idata); __global__ void kernScatter(int n, int *odata, diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index e600c29..9cbaebf 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -8,8 +8,14 @@ namespace CPU { * CPU scan (prefix sum). */ void scan(int n, int *odata, const int *idata) { - // TODO - printf("TODO\n"); + if(n > 0) + { + odata[0] = 0; + for(int i = 1 ; i < n; i++) + { + odata[i] = idata[i-1] + odata[i-1]; + } + } } /** @@ -18,8 +24,16 @@ void scan(int n, int *odata, const int *idata) { * @returns the number of elements remaining after compaction. */ int compactWithoutScan(int n, int *odata, const int *idata) { - // TODO - return -1; + int r = 0; + for(int i = 0; i < n; i++) + { + if(idata[i] != 0) + { + odata[r] = idata[i]; + r++; + } + } + return r; } /** @@ -28,9 +42,108 @@ int compactWithoutScan(int n, int *odata, const int *idata) { * @returns the number of elements remaining after compaction. */ int compactWithScan(int n, int *odata, const int *idata) { - // TODO - return -1; + int* mapped_ary = new int [n]; + int* scan_ary = new int [n]; + + //map input to 0s and 1s + for(int i = 0; i < n; i++) + { + mapped_ary[i] = (idata[i]!=0) ? 1 : 0; + } + + scan(n,scan_ary,mapped_ary); + + //scatter + for(int i = 0; i < n; i++) + { + if(mapped_ary[i] != 0) + { + odata[ scan_ary[i] ] = idata[i]; + } + } + + int r = scan_ary[n-1] + mapped_ary[n-1]; + delete[] mapped_ary; + delete[] scan_ary; + return r; +} + + + +/** +* CPU simple merge sort +*/ +void merge(int left, int right, int mid,int * data) +{ + int i = left; + int j = mid+1; + int k = left; + + int * odata = new int[right+1]; + + while(i <= mid && j <= right) + { + if(data[i] <= data[j]) + { + odata[k] = data[i]; + i++; + k++; + } + else + { + odata[k] = data[j]; + j++; + k++; + } + } + + while( i <= mid ) + { + odata[k] = data[i]; + i++; + k++; + } + + while( j <= right ) + { + odata[k] = data[j]; + j++; + k++; + } + + for(int i = left; i<=right; i++) + { + data[i] = odata[i]; + } + + delete[] odata; +} + + +void mergeSort(int left, int right, int *odata) +{ + if(left < right) + { + int mid = (left+right)/2; + mergeSort(left,mid,odata); + mergeSort(mid+1,right,odata); + merge(left,right,mid,odata); + } +} + +void mergeLauncher(int left, int right, int *odata, const int *idata) +{ + for(int i=left; i<=right; i++) + { + odata[i] = idata[i]; + } + mergeSort(left,right,odata); } + + + + + } } diff --git a/stream_compaction/cpu.h b/stream_compaction/cpu.h index 6348bf3..c7c1d10 100644 --- a/stream_compaction/cpu.h +++ b/stream_compaction/cpu.h @@ -7,5 +7,10 @@ namespace CPU { int compactWithoutScan(int n, int *odata, const int *idata); int compactWithScan(int n, int *odata, const int *idata); + + //void merge(int left, int right, int mid,int * odata,const int * idata); + + //void mergeSort(int left, int right, int *odata); + void mergeLauncher(int left, int right, int *odata, const int *idata); } } diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index b2f739b..b8721ed 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -4,31 +4,333 @@ #include "efficient.h" namespace StreamCompaction { -namespace Efficient { + namespace Efficient { + //const int blockSize = 128; -// TODO: __global__ + -/** - * Performs prefix-sum (aka scan) on idata, storing the result into odata. - */ -void scan(int n, int *odata, const int *idata) { - // TODO - printf("TODO\n"); -} + __global__ void kernUpSweep(int size, int step, int * data) + { + //step = 2^(d+1) + int k = threadIdx.x + blockDim.x * blockIdx.x; + + if(k < size) + { + if ( k % step == 0 ) + { + data[k + step - 1] += data[k + (step>>1) - 1]; + } + } + + } -/** - * Performs stream compaction on idata, storing the result into odata. - * All zeroes are discarded. - * - * @param n The number of elements in idata. - * @param odata The array into which to store elements. - * @param idata The array of elements to compact. - * @returns The number of elements remaining after compaction. - */ -int compact(int n, int *odata, const int *idata) { - // TODO - return -1; -} + __global__ void kernDownSweep(int size,int step, int * data) + { + //step = 2^(d+1) + int k = threadIdx.x + blockDim.x * blockIdx.x; -} + if(k < size) + { + if ( k % step == 0 ) + { + int left_child = data[k + (step>>1) - 1]; + data[k + (step>>1) - 1] = data[k + step - 1]; + data[k + step - 1] += left_child; + } + } + } + + + __global__ void kernSetRootZero(int rootId, int * data) + { + int k = threadIdx.x + blockDim.x * blockIdx.x; + if(k == rootId) + { + data[k] = 0; + } + } + + /** + * Performs prefix-sum (aka scan) on idata, storing the result into odata. + */ + void scan(int n, int *odata, const int *idata,bool is_dev_data) { + //if using device data directly + + + int * dev_data; + + int ceil_log2n = ilog2ceil(n); + int size = 1 << ceil_log2n; + + dim3 fullBlocksPerGrid((size + blockSize - 1) / blockSize); + + + cudaMalloc((void**)&dev_data, size * sizeof(int)); + checkCUDAError("cudaMalloc dev_data failed"); + Common::kernZeroArray<<< fullBlocksPerGrid, blockSize>>>(size, dev_data); + if(!is_dev_data) + { + //host data + cudaMemcpy(dev_data,idata, n * sizeof(int),cudaMemcpyHostToDevice); + checkCUDAError("cudaMemcpy from data to dev_data failed"); + } + else + { + cudaMemcpy(dev_data,idata, n * sizeof(int),cudaMemcpyDeviceToDevice); + checkCUDAError("cudaMemcpy from data to dev_data failed"); + } + cudaDeviceSynchronize(); + + //UpSweep + for(int d = 0 ; d < ceil_log2n - 1 ; d++) + { + kernUpSweep<<< fullBlocksPerGrid, blockSize>>> (size, 1<<(d+1) , dev_data); + cudaDeviceSynchronize(); + } + + kernSetRootZero<<< fullBlocksPerGrid, blockSize>>> ( size - 1 , dev_data); + cudaDeviceSynchronize(); + + for(int d = ceil_log2n - 1 ; d >= 0 ; d--) + { + kernDownSweep<<< fullBlocksPerGrid, blockSize>>> (size, 1<<(d+1) , dev_data); + cudaDeviceSynchronize(); + } + + + if(!is_dev_data) + { + cudaMemcpy(odata,dev_data,n * sizeof(int),cudaMemcpyDeviceToHost); + checkCUDAError("cudaMemcpy from dev_data to odata failed"); + } + else + { + cudaMemcpy(odata,dev_data,n * sizeof(int),cudaMemcpyDeviceToDevice); + checkCUDAError("cudaMemcpy from dev_data to odata failed"); + } + cudaFree(dev_data); + } + + /** + * Performs stream compaction on idata, storing the result into odata. + * All zeroes are discarded. + * + * @param n The number of elements in idata. + * @param odata The array into which to store elements. + * @param idata The array of elements to compact. + * @returns The number of elements remaining after compaction. + */ + int compact(int n, int *odata, const int *idata) { + int hos_scans; + int hos_bools; + int * dev_bools; + int * dev_scans; + int * dev_idata; + int * dev_odata; + dim3 fullBlocksPerGrid((n + blockSize - 1) / blockSize); + + cudaMalloc((void**)&dev_bools, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_bools failed"); + cudaMalloc((void**)&dev_scans, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_scans failed"); + cudaMalloc((void**)&dev_idata, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_idata failed"); + cudaMalloc((void**)&dev_odata, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_odata failed"); + + cudaMemcpy(dev_idata,idata, n * sizeof(int),cudaMemcpyHostToDevice); + checkCUDAError("cudaMemcpy from data to dev_data failed"); + cudaDeviceSynchronize(); + + Common::kernMapToBoolean<<< fullBlocksPerGrid, blockSize>>> ( n , dev_bools, dev_idata ); + cudaDeviceSynchronize(); + + //cudaMemcpy(hos_bools,dev_bools, n * sizeof(int),cudaMemcpyDeviceToHost); + //checkCUDAError("cudaMemcpy from data to dev_data failed"); + //cudaDeviceSynchronize(); + + scan(n,dev_scans,dev_bools,true); + + //cudaMemcpy(dev_scans,hos_scans, n * sizeof(int),cudaMemcpyHostToDevice); + //checkCUDAError("cudaMemcpy from hos_scans to dev_scans failed"); + //cudaDeviceSynchronize(); + + Common::kernScatter<<< fullBlocksPerGrid, blockSize>>>(n, dev_odata, + dev_idata, dev_bools, dev_scans); + cudaDeviceSynchronize(); + + cudaMemcpy(odata,dev_odata,n * sizeof(int),cudaMemcpyDeviceToHost); + checkCUDAError("cudaMemcpy from dev_odata to odata failed"); + //cudaDeviceSynchronize(); + + cudaMemcpy(&hos_scans,dev_scans+n-1,sizeof(int),cudaMemcpyDeviceToHost); + checkCUDAError("cudaMemcpy scans[n-1] failed"); + + cudaMemcpy(&hos_bools,dev_bools+n-1,sizeof(int),cudaMemcpyDeviceToHost); + checkCUDAError("cudaMemcpy bools[n-1] failed"); + + cudaDeviceSynchronize(); + + + + cudaFree(dev_idata); + cudaFree(dev_odata); + cudaFree(dev_bools); + cudaFree(dev_scans); + + //int num = hos_scans[n-1] + hos_bools[n-1]; + int num = hos_scans + hos_bools; + //delete[] hos_scans; + //delete[] hos_bools; + + return num; + } + + + + + + + + //Radix sort + + + __global__ void kernGetE(int n, int * odata, const int * idata,int cur_bit) + { + int index = threadIdx.x + blockDim.x * blockIdx.x; + if( index < n) + { + odata[index] = 1 - ( ( idata[index] & (1 << cur_bit ) ) >> cur_bit ); + } + } + + __global__ void kernGetK(int n, int* t, const int * f, const int totalFalses) + { + int index = threadIdx.x + blockDim.x * blockIdx.x; + if( index < n) + { + t[index] = index - f[index] + totalFalses; + } + } + + __global__ void kernRadixScatter(int n, int * odata,const int * idata, const int * e, const int * t, const int * f) + { + int index = threadIdx.x + blockDim.x * blockIdx.x; + if( index < n) + { + odata[ (e[index]==0) ? t[index] : f[index] ] = idata[index] ; + } + } + + + + int * dev_i; + int * dev_o; + int * dev_e; // dev_e[i] = 1 - dev_idata[i].cur_bit + int * dev_f; // exclusive scan of dev_e, id if false + int * dev_t; // i ¨Cf[i] + totalFalses, id if true + + + + + void radixSort(int n, int *dev_odata, const int *dev_idata, int cur_bit) + { + dim3 fullBlocksPerGrid((n + blockSize - 1) / blockSize); + // get e + kernGetE<<< fullBlocksPerGrid, blockSize>>>(n,dev_e,dev_idata,cur_bit); + cudaDeviceSynchronize(); + + scan(n,dev_f,dev_e,true); + int totalFalses; + int last_e; + cudaMemcpy(&last_e,dev_e+n-1,sizeof(int),cudaMemcpyDeviceToHost); + checkCUDAError("cudaMemcpy dev_e[n-1] failed"); + cudaMemcpy(&totalFalses,dev_f+n-1,sizeof(int),cudaMemcpyDeviceToHost); + checkCUDAError("cudaMemcpy dev_f[n-1] failed"); + totalFalses += last_e; + + //get t + kernGetK<<< fullBlocksPerGrid, blockSize>>>(n,dev_t,dev_f,totalFalses); + + //scatter + kernRadixScatter<<< fullBlocksPerGrid, blockSize>>>(n,dev_odata,dev_idata,dev_e,dev_t,dev_f); + } + + + + //wrapper + void radixSortLauncher(int n, int *odata, const int *idata, int msb,int lsb) + { + //simple version + //no split, no merge, no shared memory + + //split + + + //sort + //for each split + + + cudaMalloc((void**)&dev_i, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_i failed"); + cudaMalloc((void**)&dev_o, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_o failed"); + cudaMalloc((void**)&dev_e, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_e failed"); + cudaMalloc((void**)&dev_f, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_f failed"); + cudaMalloc((void**)&dev_t, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_t failed"); + + int * dev_cur_i = dev_i; + int * dev_cur_o = dev_o; + /* + if( (msb - lsb) % 2 == 0) + { + dev_cur_i = dev_i; + dev_cur_o = dev_o; + } + else + { + dev_cur_i = dev_o; + dev_cur_o = dev_i; + } + */ + + cudaMemcpy(dev_cur_i,idata,n*sizeof(int),cudaMemcpyHostToDevice); + checkCUDAError("cudaMemcpy from idata to dev_cur_i failed"); + + + for(int i = lsb; i <= msb; i++) + { + radixSort(n,dev_cur_o,dev_cur_i,i); + + int * tmp = dev_cur_i; + dev_cur_i = dev_cur_o; + dev_cur_o = tmp; + } + + + //merge + + + //////// + + cudaMemcpy(odata,dev_cur_i,n*sizeof(int),cudaMemcpyDeviceToHost); + checkCUDAError("cudaMemcpy from dev_cur_o to odata failed"); + + + cudaFree(dev_i); + cudaFree(dev_o); + cudaFree(dev_e); + cudaFree(dev_f); + cudaFree(dev_t); + } + + + + + + + } } diff --git a/stream_compaction/efficient.h b/stream_compaction/efficient.h index 395ba10..ac5c1cd 100644 --- a/stream_compaction/efficient.h +++ b/stream_compaction/efficient.h @@ -2,8 +2,10 @@ namespace StreamCompaction { namespace Efficient { - void scan(int n, int *odata, const int *idata); + void scan(int n, int *odata, const int *idata,bool is_dev_data=false); int compact(int n, int *odata, const int *idata); + + void radixSortLauncher(int n, int *odata, const int *idata, int msb,int lsb); } } diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 3d86b60..e5940c3 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -5,16 +5,95 @@ namespace StreamCompaction { namespace Naive { + //const int blockSize = 128; -// TODO: __global__ + int* dev_odata; + int* dev_idata; + //int* dev_tdata; //temp transfer one -/** - * Performs prefix-sum (aka scan) on idata, storing the result into odata. - */ -void scan(int n, int *odata, const int *idata) { - // TODO - printf("TODO\n"); -} + + __global__ void kernWriteOneSum(int n,int threshold, int* odata, const int* idata) + { + //threshold ... 2^(d-1) + int k = threadIdx.x + blockDim.x * blockIdx.x; + if( k < n ) + { + if( k >= threshold ) + { + odata[k] = idata[k - threshold] + idata[k]; + } + else + { + odata[k] = idata[k]; + } + } + } + + + + + /** + * Performs prefix-sum (aka scan) on idata, storing the result into odata. + */ + void scan(int n, int *odata, const int *idata) { + //naive parrellel scan + int ceil_log2n = ilog2ceil(n); + + + dim3 fullBlocksPerGrid((n + blockSize - 1) / blockSize); + + cudaMalloc((void**)&dev_idata, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_idata failed"); + + cudaMalloc((void**)&dev_odata, n * sizeof(int)); + checkCUDAError("cudaMalloc dev_odata failed"); + + + int* cur_out = dev_odata; + int* cur_in = dev_idata; + /* + //make sure the last write to idata (before inclusive 2 exclusive) + if(ceil_log2n % 2 == 0) + { + cur_out = dev_odata; + cur_in = dev_idata; + } + else + { + cur_out = dev_idata; + cur_in = dev_odata; + } + */ + + cudaMemcpy(cur_in,idata,n*sizeof(int),cudaMemcpyHostToDevice); + checkCUDAError("cudaMemcpy from idata to cur_in failed"); + + + cudaDeviceSynchronize(); + + + + for (int d = 1; d <= ceil_log2n ; d++) + { + kernWriteOneSum<<< fullBlocksPerGrid, blockSize>>> (n , 1<<(d-1) , cur_out, cur_in); + + int* tmp_p = cur_out; + cur_out = cur_in; + cur_in = tmp_p; + + + cudaDeviceSynchronize(); + } + + Common::kernInclusive2Exclusive<<< fullBlocksPerGrid, blockSize>>>(n,cur_out,cur_in); + + cudaMemcpy(odata,cur_out,n*sizeof(int),cudaMemcpyDeviceToHost); + checkCUDAError("cudaMemcpy from dev_odata to odata failed"); + + cudaFree(dev_idata); + cudaFree(dev_odata); + + } } } diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu index d8dbb32..856a428 100644 --- a/stream_compaction/thrust.cu +++ b/stream_compaction/thrust.cu @@ -15,7 +15,7 @@ namespace Thrust { void scan(int n, int *odata, const int *idata) { // TODO use `thrust::exclusive_scan` // example: for device_vectors dv_in and dv_out: - // thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); + thrust::exclusive_scan(idata, idata + n, odata); } } From 48ee5718dea135673dedf1dd17f8926cad094c88 Mon Sep 17 00:00:00 2001 From: shrekshao Date: Sun, 13 Sep 2015 23:11:30 -0400 Subject: [PATCH 2/2] update readme --- README.md | 343 ++++++++++++++++++++------------------------- images/compact.png | Bin 0 -> 15771 bytes images/radix.png | Bin 0 -> 12050 bytes images/scan.png | Bin 0 -> 16103 bytes src/main.cpp | 16 +-- 5 files changed, 162 insertions(+), 197 deletions(-) create mode 100644 images/compact.png create mode 100644 images/radix.png create mode 100644 images/scan.png diff --git a/README.md b/README.md index a82ea0f..d7dd710 100644 --- a/README.md +++ b/README.md @@ -1,213 +1,178 @@ CUDA Stream Compaction -====================== +================= **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 2** -* (TODO) YOUR NAME HERE -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Shuai Shao (Shrek) +* Tested on: Windows 7, i5-3210M @ 2.50GHz 4.00GB, GeForce GT 640M LE (Personal Laptop) + +Intro +--------------------- +This project implements parallel reduction, scan, and sort algorithm, which are building blocks for many algorithms, in cpu approach and gpu approach. The test program is able to generate random array of integers and test the correctness of these implementation, and make a comparison in terms of execution time. cpu time is currently measured by `clock_t`, while the gpu time is recorded via `cudaEvent` + +| | +| ------------- | ------------- | ----------------| +| cpu scan | +| naive scan| +| work-efficient scan| +| thrust scan| +| ------| ----------------|-------| +|cpu compact without scan| +|cpu compact with scan| +|work-efficient compact| + + +I also implemented a simple version of Radix. Due to time limitation, there's no shared memory usage. So no split and merge steps. Only global memory is used. +| | +| ------------- | ------------- | ----------------| +|cpu merge sort| +|radix sort| + + +Rough block size optimization +----------------------------------- + +Testing on a 2^16 array for block size `{64,128,192,256}`. When `blockSize=192` turns out every GPU function cost less time than other blockSizes. Here is some of the comparison (ms): +|-|naive scan| work-efficient scan|thrust scan|work-efficient compact +| ------------- | ------------- | ----------------| +|64|3.12|5.76|0.00128|6.32 +|128|2.38|3.94|0.00131|5.54 +|192|2.11|3.89|0.00128|4.97 +|256|2.90|4.09|0.00131|6.72 + + +Execution Time Analysis +--------------------------------- + +For this part, I have `blockSize = 192` constant. I test different cpu and gpu approaches on different data size. I have included the time for GPU global memory operation such as `cudaMalloc` and `cudaMemcpy`. + ++ Scan: +![scan_table](images/scan.png) + ++ Compaction: +![scan_table](images/compact.png) + +(array size = 2^n (x axis)) +(execution time = y axis) + +The unexpected thing is that my GPU implementation cost much more time than the CPU serial approach. On the other hand, the thrust toolkit function is perfect. +One thing to notice is that all the GPU parallel algorithms here use shared memory instead of global memory I used here. Shared memory accessing speed is >1TB/s while global memory accessing speed is around 150GB/s. In the case of my machine, the 48KB shared memory space per block can store 12k int array at maximum. Turns out the memory accessing speed is the bottle neck for my implementation. On the time line we can also spot this. +Another thing is that when using global memory, as the data size boom, some blocks can not be parallel any more. + +Besides, it is also unexpected that work-efficient scan runs slower than the naive one. My implementation has tried the best to reduce unnecessary memcpy and malloc, i.e. I use two arrays taking turns to be input data and output data by using two points `cur_in` and `cur_out` for the naive scan. I use only one array for work-efficient scan since there's no race on the same level. So basically the problem here is that although work-efficient scan avoid a lot of unnecessary sum operations, the work-efficient scan uses more memory access than the naive approach. The max memory access times per thread for naive scan is 3, while for work-efficient scan, the number is 3 for up-sweeping, and 5 for down-sweeping. Without cache, this is really time-consuming. + + +But when I check the timeline for thrust, I cannot find function calls but only blank. I fail to find the secret of thrust at present. + + + + +Output Sample +--------------------------- +``` +ArraySize:2^(16), 65536 +BlockSize:192 -### (TODO: Your README) +**************** +** SCAN TESTS ** +**************** + [ 38 19 38 37 5 47 15 35 0 12 3 0 42 ... 35 0 ] +==== cpu scan, power-of-two ==== +time:1.000000 + [ 0 38 57 95 132 137 184 199 234 234 246 249 249 ... 1604374 1604409 ] +==== cpu scan, non-power-of-two ==== +time:0.000000 + [ 0 38 57 95 132 137 184 199 234 234 246 249 249 ... 1604305 1604316 ] + passed +==== naive scan, power-of-two ==== +time:2.123680 + passed +==== naive scan, non-power-of-two ==== +time:2.108992 + passed +==== work-efficient scan, power-of-two ==== +time:3.803328 + passed +==== work-efficient scan, non-power-of-two ==== +time:3.889184 + passed +==== thrust scan, power-of-two ==== +time:0.001312 + passed +==== thrust scan, non-power-of-two ==== +time:0.001280 + passed -Include analysis, etc. (Remember, this is public, so don't put -anything here that you don't want to share with the world.) +***************************** +** STREAM COMPACTION TESTS ** +***************************** + [ 2 3 2 1 3 1 1 1 2 0 1 0 2 ... 1 0 ] +==== cpu compact without scan, power-of-two ==== +time:0.000000 + [ 2 3 2 1 3 1 1 1 2 1 2 1 1 ... 1 1 ] + passed +==== cpu compact without scan, non-power-of-two ==== +time:0.000000 + [ 2 3 2 1 3 1 1 1 2 1 2 1 1 ... 3 1 ] + passed +==== cpu compact with scan ==== +time:1.000000 + [ 2 3 2 1 3 1 1 1 2 1 2 1 1 ... 1 1 ] + passed +==== work-efficient compact, power-of-two ==== +time:4.982400 + [ 2 3 2 1 3 1 1 1 2 1 2 1 1 ... 1 1 ] + passed +==== work-efficient compact, non-power-of-two ==== +time:4.974688 + passed -Instructions (delete me) -======================== +***************************** +** SIMPLE RADIX SORT TESTS ** +***************************** + [ 38 99 29 24 92 113 110 27 36 5 11 33 126 ... 99 0 ] +==== cpu sort, power-of-two ==== +time:20.000000 + [ 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 126 126 ] +==== radix sort, power-of-two ==== +time:30.557344 + [ 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 126 126 ] + passed -This is due Sunday, September 13 at midnight. +``` -**Summary:** In this project, you'll implement GPU stream compaction in CUDA, -from scratch. This algorithm is widely used, and will be important for -accelerating your path tracer project. -Your stream compaction implementations in this project will simply remove `0`s -from an array of `int`s. In the path tracer, you will remove terminated paths -from an array of rays. -In addition to being useful for your path tracer, this project is meant to -reorient your algorithmic thinking to the way of the GPU. On GPUs, many -algorithms can benefit from massive parallelism and, in particular, data -parallelism: executing the same code many times simultaneously with different -data. -You'll implement a few different versions of the *Scan* (*Prefix Sum*) -algorithm. First, you'll implement a CPU version of the algorithm to reinforce -your understanding. Then, you'll write a few GPU implementations: "naive" and -"work-efficient." Finally, you'll use some of these to implement GPU stream -compaction. -**Algorithm overview & details:** There are two primary references for details -on the implementation of scan and stream compaction. -* The [slides on Parallel Algorithms](https://github.com/CIS565-Fall-2015/cis565-fall-2015.github.io/raw/master/lectures/2-Parallel-Algorithms.pptx) - for Scan, Stream Compaction, and Work-Efficient Parallel Scan. -* GPU Gems 3, Chapter 39 - [Parallel Prefix Sum (Scan) with CUDA](http://http.developer.nvidia.com/GPUGems3/gpugems3_ch39.html). -Your GPU stream compaction implementation will live inside of the -`stream_compaction` subproject. This way, you will be able to easily copy it -over for use in your GPU path tracer. +Extra: Radix Sort +-------------------------- +To enable Radix Sort, you need to uncomment this macro define +>//#define RADIX_SORT_TEST -## Part 0: The Usual +Due to time limitation, there's no shared memory usage. So no split and bitonic merge steps. Only global memory is used. I used a CPU Merge sort to make a comparison and do correctness checking. The range of the random number is linear to the size of the array. -This project (and all other CUDA projects in this course) requires an NVIDIA -graphics card with CUDA capability. Any card with Compute Capability 2.0 -(`sm_20`) or greater will work. Check your GPU on this -[compatibility table](https://developer.nvidia.com/cuda-gpus). -If you do not have a personal machine with these specs, you may use those -computers in the Moore 100B/C which have supported GPUs. +(time ms) +|n | cpu merge sort | gpu simple radix | +| ------------- | ------------- | ------- | +|15|11| 54.683 +|16 |21 |71.0747 +|17 |56 |95.9559 +|18 |102 |166.257 +|19 |1200 |281.074 +|20 |3793 |532.041 +|21 |10227 |1018.44 + + +![scan_table](images/radix.png) + +Turns out even with global memory access and take in account malloc and memcpy, the gpu approach still shows its power after n >=18. With split and shared memory, radix sort must be able to make an impact. -**HOWEVER**: If you need to use the lab computer for your development, you will -not presently be able to do GPU performance profiling. This will be very -important for debugging performance bottlenecks in your program. -### Useful existing code -* `stream_compaction/common.h` - * `checkCUDAError` macro: checks for CUDA errors and exits if there were any. - * `ilog2ceil(x)`: computes the ceiling of log2(x), as an integer. -* `main.cpp` - * Some testing code for your implementations. -## Part 1: CPU Scan & Stream Compaction -This stream compaction method will remove `0`s from an array of `int`s. - -In `stream_compaction/cpu.cu`, implement: - -* `StreamCompaction::CPU::scan`: compute an exclusive prefix sum. -* `StreamCompaction::CPU::compactWithoutScan`: stream compaction without using - the `scan` function. -* `StreamCompaction::CPU::compactWithScan`: stream compaction using the `scan` - function. Map the input array to an array of 0s and 1s, scan it, and use - scatter to produce the output. You will need a **CPU** scatter implementation - for this (see slides or GPU Gems chapter for an explanation). - -These implementations should only be a few lines long. - - -## Part 2: Naive GPU Scan Algorithm - -In `stream_compaction/naive.cu`, implement `StreamCompaction::Naive::scan` - -This uses the "Naive" algorithm from GPU Gems 3, Section 39.2.1. We haven't yet -taught shared memory, and you **shouldn't use it yet**. Example 39-1 uses -shared memory, but is limited to operating on very small arrays! Instead, write -this using global memory only. As a result of this, you will have to do -`ilog2ceil(n)` separate kernel invocations. - -Beware of errors in Example 39-1 in the book; both the pseudocode and the CUDA -code in the online version of Chapter 39 are known to have a few small errors -(in superscripting, missing braces, bad indentation, etc.) - -Since the parallel scan algorithm operates on a binary tree structure, it works -best with arrays with power-of-two length. Make sure your implementation works -on non-power-of-two sized arrays (see `ilog2ceil`). This requires extra memory -- your intermediate array sizes will need to be rounded to the next power of -two. - - -## Part 3: Work-Efficient GPU Scan & Stream Compaction - -### 3.1. Scan - -In `stream_compaction/efficient.cu`, implement -`StreamCompaction::Efficient::scan` - -All of the text in Part 2 applies. - -* This uses the "Work-Efficient" algorithm from GPU Gems 3, Section 39.2.2. -* Beware of errors in Example 39-2. -* Test non-power-of-two sized arrays. - -### 3.2. Stream Compaction - -This stream compaction method will remove `0`s from an array of `int`s. - -In `stream_compaction/efficient.cu`, implement -`StreamCompaction::Efficient::compact` - -For compaction, you will also need to implement the scatter algorithm presented -in the slides and the GPU Gems chapter. - -In `stream_compaction/common.cu`, implement these for use in `compact`: - -* `StreamCompaction::Common::kernMapToBoolean` -* `StreamCompaction::Common::kernScatter` - - -## Part 4: Using Thrust's Implementation - -In `stream_compaction/thrust.cu`, implement: - -* `StreamCompaction::Thrust::scan` - -This should be a very short function which wraps a call to the Thrust library -function `thrust::exclusive_scan(first, last, result)`. - -To measure timing, be sure to exclude memory operations by passing -`exclusive_scan` a `thrust::device_vector` (which is already allocated on the -GPU). You can create a `thrust::device_vector` by creating a -`thrust::host_vector` from the given pointer, then casting it. - - -## Part 5: Radix Sort (Extra Credit) (+10) - -Add an additional module to the `stream_compaction` subproject. Implement radix -sort using one of your scan implementations. Add tests to check its correctness. - - -## Write-up - -1. Update all of the TODOs at the top of this README. -2. Add a description of this project including a list of its features. -3. Add your performance analysis (see below). - -All extra credit features must be documented in your README, explaining its -value (with performance comparison, if applicable!) and showing an example how -it works. For radix sort, show how it is called and an example of its output. - -Always profile with Release mode builds and run without debugging. - -### Questions - -* Roughly optimize the block sizes of each of your implementations for minimal - run time on your GPU. - * (You shouldn't compare unoptimized implementations to each other!) - -* Compare all of these GPU Scan implementations (Naive, Work-Efficient, and - Thrust) to the serial CPU version of Scan. Plot a graph of the comparison - (with array size on the independent axis). - * You should use CUDA events for timing. Be sure **not** to include any - explicit memory operations in your performance measurements, for - comparability. - * To guess at what might be happening inside the Thrust implementation, take - a look at the Nsight timeline for its execution. - -* Write a brief explanation of the phenomena you see here. - * Can you find the performance bottlenecks? Is it memory I/O? Computation? Is - it different for each implementation? - -* Paste the output of the test program into a triple-backtick block in your - README. - * If you add your own tests (e.g. for radix sort or to test additional corner - cases), be sure to mention it explicitly. - -These questions should help guide you in performance analysis on future -assignments, as well. - -## Submit - -If you have modified any of the `CMakeLists.txt` files at all (aside from the -list of `SOURCE_FILES`), you must test that your project can build in Moore -100B/C. Beware of any build issues discussed on the Google Group. - -1. Open a GitHub pull request so that we can see that you have finished. - The title should be "Submission: YOUR NAME". -2. Send an email to the TA (gmail: kainino1+cis565@) with: - * **Subject**: in the form of `[CIS565] Project 2: PENNKEY` - * Direct link to your pull request on GitHub - * In the form of a grade (0-100+) with comments, evaluate your own - performance on the project. - * Feedback on the project itself, if any. diff --git a/images/compact.png b/images/compact.png new file mode 100644 index 0000000000000000000000000000000000000000..54e12eb59e3f9f0d4e693b034225ae7e5cb4eadb GIT binary patch literal 15771 zcmdUWcT`hduy1T&K@d<8snV4uMUd_X5l|2;D4k${fPnN~RisO=p?-9cCLmpi^cFxs zN+9%JBfXQi4|>#qCWKX1K1UR;WtoU_mD*}wVC%%0i)PgN8yU%Yt{27_IGqzHcw zgPmr9!A>=uKMk%lEjikOA0$X61vrcd{rR^#D*{}(V5O*O3xkn2Kz~m~ag)=*V9c;b z@PE{t5>`fB?dS&F`S+PlQL&w0JM~Vr*JY5|5FMG(Lc*@_(MY}Yv%wtK_mwwTBjN6p zUXCTb9_|67A-rMYR~<<`v5qIEF9*1vyAE;4M_uJfEEV`7k{=4UXHzUxR4g(Kh(22z zuKdjH^!TN-yD%r>;PjPKTme^&?+X3c+3@Y}zY@DQvDdg7Cmp;ax0<*k@g25gt8isu+aD!WRq!0(eziXyyjpA*Qp>;t~g=st(tiU;!YQ2Jig5OO${mzTHZSv0(oOV^=)*UrxF3gwcVf%6#{>_!-?uC4(&S)dxzzr5dzvZ`&bPAefRDP_=TCdfeZ#ae_jAQ ze_9s%|L5h$G!SU6G~+@@RUI#@(O%m2$W$xO)3+pnF<*PgV!J)INf!K3MQ@SXP7WaG zo!YNCicTL^czP>UsrDr>_0Rx|ZPl+$O{w-DEFe{c2CVLLXiiPY2K`G1DJ4BsLhe+* z2I3tAMZG5-aF^-`c>2@yJbvtTJ8fvlNZSF4F@slMUS@H7Up@_V`k-xrHi;lW?V|4k z>G}3Nig@4591aG95z1ABmOH?Ix<2$yOcdT`d8~ftafxY)s4o~^kyj@Kw_j)O&GKZv z0|NbXC4y`S`lkZ+F7%#XHpcg@&j>*_p?EK_7kgGZp4JQc1s$IBCIrB!% z`Q}TcNB}g%wLnr7hpGC>B(lA#kf=Hm)QO73sbo4nolK2|3ca*(PLnPX4E&>5+S5ve z{#NP-ymHB7^w$*@=xuc!9+WQFlgC(A_o-ZvC~{!h%+?3u05)zVV z9YD6>MNY@WkIh*3CX`>xf}M+G5@3=Vk%cW_)BFgtGq86vPdYj;hdiMSdmbht;5N4( zhj|BmS%8=bB2WuilpV}>r-{S|X$#)H`(yU>uA6iz6E)^zGca`hph_8rTQ^x#FF%ZH zW*kl!&*L9>GwV>Lt)-=jw98)Ce+dT5`33}PU=*lNx3{-{ z`RWzlUfEw7i0OC}oIFX7f#yH8g22yL&Tz7{6D0T5oKqztq5}cVDxUV-o(g>blZVnh z$3h!`=R;XAL=bGn-9LoeZ-72w=VoeGU##SO$aHE%+`J=hdCIX0HL9r8x?Tx?ukLN zaqJ5VL59YbqiC4lMPv9D8Uxm~e^`sN`sjDjMyT&jhK3P@-0fJ~T8ZlZVaF`21UHQ;VitX|Y;!VHhOa~v`s`oJ zFif}ltI+f!pwRcR#@OkT8DA6!WbuIO=vd!MTp12bGfAG^K@5JPdT?i^@L!E81C2g5 z{KGe`aMRvDN)l!XtB^*7mUO=%fx#q~Hb{-@DOv8cbg1P-B4dOoAhu((*jCDKJ~8G; zQ~9d0oT%B87oQdFP`((0o;dlX_LuTU2H`Y_^dj5LQ71Db{cC~GWye;i?wo_c7&T6m z`ut|>AH-;UM{)q|!ko&Sjy-{XV86NVGcYp7v!vQ(q^jjyuSyvE09YneD!{dK8q z7>9*`0Hm5C=z-e|0`<_l4GEoJ;iP|P=j`d}eYy*5#*zWHQ z4OYeo#OhfRUD&22t|w=q3%Oa(~OJr|LTRh8t`q)n<}wG8M=m^R53bnBpC-rd?Rgu z5@@CBFOi}iJN;Q&!4juNYJFFB1PpPgMN?r8X2P+%;Da;Rd^HH5*Cnh7Y5 ziDI-sP9UuckjT|l!i<89>_(70%le)hKB+_SI|!MU+`q1zhCR@~Lv_A`>KV!gevRJMT(_`qeUhANntR7N{c=JCVvwH>(1GZ_(r}t zX~2M0Bb_Tn_vy*wJNohX(73%s>110nR;_e;uqmWSehvb#u zT4FIx$-6Jes{|o89!j$sfTo~P+{}W^Dv41dDQC?-GkIH_Pm>=0uR%U+j98A&r1Y~W-e?Vqa z_#Yq(UJ|Y!g)Eonzgh0f6I1!G_;?91UCR%^o-?)$WKd)g`d*^Gi2j5O-tq9L2t{<^ zunW7@_w5a>E^)Mw4X5+`Z^M<_4q_Ciq4(*E z9?XoKL`|^URZ9^|{%A31@CD#wT2}>yPNabU7ayb9iQ<4HGyD%9V{=p)Z8ieLDS%=Q zW!xVp^PuQ@L99Pe3v84(iO2-r1UtN_;_KU->Bps;&DHk>AhPyEM!)~`F<32#0@F=^ ztrtlleIOMpb|+34-uR>MLl3HxOQr|7!In(CVYRCypDI{V4Mi! zE8q1jjf^rr`1SM{y0L?RD!tp;J@$!FXKUx+vEQp@o89+?K(6fjy;47Jy50#ia=%>+ z#Uijr0xs@6oPE{BPLSNmis(sD+r*SVbReijO{#-ILn5rs?M87-S+>OxG z?9XM@Cl2^(x`rIi3D~xse0~Bi%UsmCRR2PVlBH+H@RfNBO%;6-w5V{0*|tN;*~Ng= zbaXQoT1UU^rD#tOUxZsQ3Xwv%f@)(`S13hPjBgoSdAt z_VCH=-kmz=u_#=TbOD!)5*~TBNCz9+G0UnD4Ch~1UdC70avo4qR*bCfvY)ym6VC-&#sT zs(-uqA89f00baE~LAosxpZFiK?jhjt&`A0LX;hG|cEBB`F=h*a5094?7b^$EI+h(= zfy3BAK;(Tr&>KpCqr4vzQA$)Cr0P>ie=wh?v{?LWsLvuLYXl0N6*8pCl2}8 zfXjQHJ9Rd9H6;AOvO+HV!d7j`o->wRYb$!o85zTRAK3bXNjQrw z{t@;IcZBDk?8x74A=A^%#s{QeGhh*sF%jnhBHuNLRa+^}=JR?lv_o18<<_ zo6rEvNa9e*j?J*Q(RC|6@u_VAy$RbQxei6!}0_mD(J`(My1Z-Z24&>j3)>YN7F(g}3_tqD#^bK3iT(Tx>&P z4Nl^2r=Ld)ewJLSdKmSuHnal2-wj7)hqp@3#uFWO zwX`>!x}xZan+*SmLh@+j=n zrkOGJ*T%X#%ZNMg6=2$ihIp&W+}1oVREmKq{D?a%TRbIoqwum*81!!6&H% zkMm{B+0WDsyJgM?W%vCbx{EFZ>T&v$9Cn(?J$pqB0)@ z-N~rKCO9`eE1=}6hB7rAKSca`5=3?-?2ZCvW0;tPPWJl%WnC1jJx_=x`f$ zeC4R;FgvdAs|NfO(Iu;`1(-DF`N9VCn_yLckE=L#Bsb)m+Wv#`fn%P{)fLka*D2&> zDjvY!v~9j64vd+UEvpRnv+t(^w~6R_I=(%Amh$%1H(lmY79qG9%<v_4uhFgBi=ij4+w!Y>uIE){4Qo^#?hu=O+!2qgPC;f(Dj+RpyO5b& z!Y&cA*Wcx7wJ}CsybZn=I6f|;Mx22L`X}MYDYV0YultK=%uIl8C)Iw}(ejjmXWLNY zOVT@w6#maiz-{_{sQR!m9R}ozR5*Rq8baz|LYq`myUJ*1yNM~|cJIUNvtX)UO=B$f zn>mgB6eFUJBcJg*h!r-u=G%FcMN9l50;NQ}tG`P)cXA_soCwj)<2UYMB)%4o9&@(K zPl?#W$;7c^$Ctse|8V6QlfoYC0|MG4JLjjEq9~+f{@c^k@ZXItGhE7XTdsZ8@iII* zUl|_`SwPOl@r+0RV5NYJY8q8ptjWaBhQ|8q4xc^a1gjV0w?8s0V#px0+yTKyl|{(c z?W8>R$?Mc_`6OKzT4IjWyQrSU2_6X{B%)j7j$+6}nfc37Ltx%-rOlIy=R6`>a^yBnhU+&HG5k?+nubD{4m}oD`Tey2+!*b!USG2z`aM_oV3VYqKWt$yhA=9IAP}N_W@}sAJasFzva)h$ zNNZH-EKD(0(QuWMQ_73=7PRwv{I!W|^%N2C3m;?YSGl8NC9+J*;~Zg%Ekx57)-WZS zCW|uaunyFp&37PB=)Gp|D&U0>CCf8X@eNW`Li+*jR5HIjY0XxIZ7Xy*BK-Y6$*0wV z-I_!(#MXv7!TbBm&t{#{V2C#A&MJ9=43uW z&E-1zK=ozAD{D?zln`d?6zu#;VsF{Ld_A;>&fu2v9Vu7i*SMK@ls$(XE8fnG(Q`c< zcYMg>Lz5GTmh&aA(BbR1c>9(;b$@@qsYzZ$tM}*6AFl2o>=8d$8wa+QtGh8RH;KXm zFBo!+Iu86c-x^a2m?Thx4ABEt{v={Q1AA~bsb&pxym!yx8-fzy;edftU=;j1-Kmf=nK9rGiFCLJ11P9yO zG21uo#E^Q5sOA2dkPvq-$TD8n*A)0^kP(0hh1z&nudO}|2`TZ!xTK{BE1bY3(3dw1 zeTC8}=0?));xo!X%G1DeJXhMuVkJ(*VM4Kh@T*F%255I>up*L{cYYqy8VwmaqOb?D zSlEM$uW7@ET9%|OL#%x;XWgIgw)M1gpDkf0F!t0tHOd5X;_n7~2YIw) z(4rDD`5CHV#CnSfL=AvPvzKxF{u-aGAo-{Uy$yY!lvPM>&G+D8O(=G=d}k7&RUR1i z_9(3k&LjU6n(_tWsPmj@7nhmm1X`hO6F1p!yGM#3-*fQ?Zrq}IN#vn?bothKDDP=4 z*x3Jl5ObjEIu`8faKmo0e+rNbxvtGu2%d4eEVDS$kDy5Rj%`5b>MrNp+y4B)Pz1E( z{ZmKt=?BS}`k{C|mpr>2nua_DC=XObO+^UF*p;8`b+&a;fz^A#dL|~=4Sl(FT58|W zCV}1-qDn=ruonFACF96vIgtgcLIJMw=!)OVzLpFLH))w!UIu$KGY$7dQaIhaP79*0 z2R$xxn|Z@t)DUjn0bz0>reAL_5x&_BG~l;~LVkI(FqK3=&PGustCB>#fTDPN?hE5+ zk=`^lBZaPw8cz4FKW)`Iz`x8FbO{U+9{Ii{$#({1%JSfNmk)Af{KWD0 z6B^u`xo*Amhz+<`w)ly{>|`DtMU;EWEW|r`#=ya@@$>dh>9Mq7`H!&!!5MX-WRN}| z8JTjdm)0{t8)PaDO*sc`y-_Tl-MUAZotDe8f;DT6ms(AV;7NRv zqqZ|`!%uQ`b?o(fl?wc|L&~39#_wFzOGjT+r&cDQE;PCb~o~VZQV;4w1p0qsZJdP}3lG&|>9Cx0=MQC_V zz)w?iE-pT&@!>%am&lj3h>HHZ5|`7Gb>>SuDOw|?4l*tu)Ua4cQ9*H(^9{y$aey6( z{cxy8LJVAZ-Ca*E{1FqvxXLxEvh3~le$68v?;pt{EcSvKiTCWAjw zq+AUE`Oj&oy)DP9w3VcA=uvVi4 znWmrYzH`PzelwYM1cEZq-f#-=op~toE#Tl4CM}hUR)4)*N-{97L*i5Fa$KR8$q%STh zslqCMX}YV+%_ePb1K~8W!B3#JfNI2w^kTsq%{S&HJ)l&fbi$FCT(@!$L^)cb`J?-N zuEr|FA_H@~54?K=$;VVcJw=?oKs~i7!%Rn8hZh94RF`>)BbI>U+b&b=k2xOB4g^zJ zc=ph3WJz^hay3!~qMujE1fsJDrChSCk)*!Cp30+(16jl)PeqiA%Pc-S5aU9?va37Z zr}sa8@lV+H9!<_pjfU>NRoK7x6pzGC!!#oZ_dP$QCZskUZWM^;kGk3NaC zfz?l=jASj23CVswd}6B!-^j~@i@gl)FYVOqo2(@Jz3i+0_~C`|aj!iXj7j(V^^L3^ z-#{?f(P3cUxrdWFwNQp=b}fM%u|iwzV7$4Q*uB<@o|kF0NbqbOp<#JGGvA(c)U#i` z1rl27oKkjO8++nnbum*y<&|EsOwAZbs%rk#{6W;!91#ew6eg_doUKg||wxWf;XgkHTyGth7d z$-U<4aCeg%jrQHJ6B9gt!+E9VEC+|`QPC6_B42lpZB~_ziFfz_xh2#^xSC_TtOtzB z%5!JL`gU8_m%&itV7+rlbZ$gD?9(R)MBPM-w=GjOVRsxXZ*MOqYPG_7DarZu+y?)v z7)u=6bNd~@74I_G$AT}h7L3FUCvPUX5wHiG(!USn(_*-z0=|q4@6@R2mbg|uAF2oj z_l|`}O7KBrH_3C3%B;ml5E73Xj^1FC+se#c@@E(3wmn5eRT-NZZvhAP#!=@6?Mlt3 zAQd;O#Scr%Go8gTNb;e9#r33N;^rt#j;QpVVj+8w|36o!B9z~?!MW2e(;bg*HWm|- zF%FH?5qx)EHswo6BzYFPRAtLfCK~L3nxGozm68b?nAV)m0Z+(~WSZ8%Pr7Y!D%X_?lEfPL4GtY2wGYqLnF!O#vNe zEl}CfNX)_lS}A|kHcRGk{MWfvS^fkbd(_LV?Jr#xt}a~>7o;aNP6NU+1O<}SopY|$ z5prp@(ul8POk^KX1lIz#LF)_nYLRYNDX=v%xY76W zT(gFE;gRuWs|!*dP3DzK467OP7SjaDq#&&R?HCqp`qhBC?%L zY&-V@?3cI)q`qmQ*@p@P2J+n|#a$PE6^KXE_Y@4G@LSm6tYq9XH7(}H&XA8iZzOz| zAs8D(YK*G4tvQdXUj#`_}&&wPcP8!`v*9q>U0bMHN#~PAY zpSdP4p>J0xSl=%w8d}(xI)IlYkdtPiaF^@CDnQQ!|FDtf^Di%D(!rv1-Dzuo&eh|j zX^Uig^4UtZHhY7~&8~Cr^&DF1y?<*g#*vn{dHFQUS9cj7!<;g=HP31Fz*pDD`lp7rl z1^AfXjwGy3|6B?GTC6&-yg!}`btXV;%!K=?WZ&%>*4-Z;mRX(>{o<8IQPCm#XHgk}P~j7k43)rD&qOS`XK5|Z;9MLY}1 zmdzpAx$s7jsbF309Z5{_<^;>A(v!!2v{%jso?6A2N5+?Aq3XJQugI(R7){aVO3?*> zddP59@Mi2RN_XwXEih})_mcJOO;JxDdche-8<$IFpDCEk`|P&nH#Ri-2Ui(C)%w_F zzD;Xyw`u`Wv3$4>2fxzJSCB+T*YGGEJdLhV)-8XgTP~oa9-XO-7gg3r+UTdmw~J02 zA8;*??Xs$`Di!lp{i~?0_WbG7^uBARmoAn(%sAJ`?&2OsK7!0)_lgyBw4Xl;*{|Tg zy{f<-5uaGQg)l~O6$#NWJ=+X<{N$Ajs>v-2UMa|{PoYpPs_ zg~uJ|C-7!1Y|~4ZeOJd=ugZmmHLkNNsb})xLo>MBdt!`$#q~s8DD(nE6cqHyw?1s* zmJDU#=Noj1=g-NdjQ<*(hu^1PBj5KkacW^AS7?%NnhGhz@JHQ|!-TRw1cIiN{;R02 zmdel7^BR_{An1pFp?k)Q6>~52zGUg*4mBJ4YDlQbcQUlqpvr~72%Zr+yhXqr1)}~8 zJICE4=)VTroz0h0*}PO;&A5HX#-j< z-J}{tZ5^$D`F}0aR90+y52cHB6}si!LnWwWHBS}d%;AQG8eej9g5;CBdN%N-;d7-? zKkEuF;cbWwNd6bh!V>p{#`uM)gW<{ZZ|rP*BqHM&@IQvaDgZZ3cD%B&O1SbngDe3~ zj{chXX<>mDV`=`u5`4eP{jpmh*i-AE==RxYB0*G-Vp@p|69^HRP>I>awP zX%kpQ*NbdH;iaN`OTTEpkmd1(_zhBSY3b%aj!J25jXt=h89uCKkeQa2_T@`J$rFwj zj*g-Pf%;bLQ$rMwLu{LiG(}7I@}?c8D*m8=(Alx@^YE0*3u@Mgnp3N))JLKf+t`o% zg4I^^2&aG#fnx)0=rG|pqIFu_B~ReLIpxkbFgZ@$1N9XK4A`L zIB%@_vAEgz-h0LE2A;050b1IHrFG%Q87%ys43skqHc^iss^oJ>$z^wTwmmmSp|XKv zy7*A5M#R_e+c{@$#NID+eN)N1f~!+ysL-n?y}Nd=&Ql1~uhfF}J+E4;vEhO*(kw%G zkDXK8ps-(wBrGdD{V0v_ccCo`PPg|8rAr`05r4Z{etnUCnGlUMU)vBb_a zg%BV{Uw^;NCEV0xX=ltljkhZt-f+NuZ9}l|NihkAP(2^xFBO4L-gVDF>TzsOAUy+| zSNi#8i&=j*msV8Zrq-US9i4WM8#IGsjzkQGk;a*Gmv%^gnwBhA(_lXG;}6gWp-ub~ zVelPbPjeIXzGkMTytlIe&b*ZOb9T07t8@G-u45x{hvgft@XMDkFH1zy ztnt6tY!(rR4}kton;avINI7@-pt*>L%oDt+E+5UovaqP=#_pn(O2YkGk@Vh$+*eEC zN=~m!wCHZ$92{N-o!0_N!Avrp13veuf@q6z^bwjxS5U%#$ym<1o`F7F%!1_tg8Jq5i`hy1Icsg*&}DYW3p z-MVV7HK6^h-t69m-J|NR28=SjZ~Ze}-HrjDNkr!SPP|Iil$qQbe?;@tgmkG^UUWf7 z9?l%4$`G@^rp`8DBzJWSH<3CMBszL!^62Zd4>J09YE4a&l;zCKjDV7xG*bwHG}um} zW72voL_E<#U!VDnig~>U%Y}Wbaz2g})$@jB!zEg8k$Q3S#q<}}3XMTy7I;1I~-L+D#%M2EGZ#XgCW^P^WNRI@FxHT%*ZE9v#aeTIFgb(jCG(7C> z6yG$(NICG9g+AZ0(@=12(9xQo+}CU6)Op{dqBkZczYmT+NT;45B?qltflx2n;M0p3 z@SyXUy&lyPO8JM6hNyvEQX~Jwxh2%}b{0+ouYVU>)v8R2)|EF$se+{W7A25Ua( zsR!Pu;qWo!m2TdWULiYJM~>b0z+f^myqm56b)6^^FLjnto(h_c}h`FQ`98Ji2; zJ8f_Zq*;m9_Ct=JrT1kqOaXy`6%L_ZuF8WO*I_e$}{g16RN)Re=gLOec6cGgYLy7biXC|g{>*m_n}BwZDq zD=I8p+ZsU7tQv`X9Nv$we(a`N`^gy%mNO=yr_y1KpZef%L;8@KDEQE`#)%#m;EC=` z5UUsIARI2wE5I*N-0}$67;2Pc*%q{Z@y&lA0 z0&ES`dC7|wpni5SkQ)9!KjzS5R5lzRA8&A7Jh5}vyXO}k51Q7&*^m(nnyTn@06Rd^ z*t({srd{vbbrxkz;&vu8WyS<$2b%o1C0bMJMbmZZ-GhTI(kUugF9vx*|8~ber>oc7 zxXmgC7J9QVM=O?`HoX+Hn+v^`fKkoZY^nseX5%eu4pytoD%9oV>bI5#_VSe&N8NWu zM9180XUZ&1Of-tB`pv`vLyi=p5O zzM?oSt$u24_sApfmFsj1Wmd2AWP?BC7)E;Z@BZrV=W`wdJ?=Sui&ppjTJ-OJt-7D6 zV&TmZ@hGdekPZ;{yiDQS!eJ&Um&m;DDJh_g)NEddEc}Y;KWSc~235F?^MGFJHb~zKsmY_yaG|n?c=Oec?OyCud}qG2oPpd|H@as`}v2 zP@Bb5b#--ZZ8{9EgDYlC^^&Dm>=P?hiJf1#6^D@K-e14qP|MKU0n-xG60K7I0fgt# z?%GuMj;Xo1@C`Ea3+kNpc^F``e#j*BeBC6I)~3tHrfM$WwdOXgw;C{F@o{m3FMu}5 zNW5ANk*YMq8~To)Ja?DRO-i2!9;KULRWQ*0s^_G?)bDdx*lbph*@7C> z8S^AFKkaT=&48w@dS8lWH`HDfAO4G2mM3n_(efy5Otfv_qU}b1_2C$CX$~x)ypFtw7gA-1`b@HFf>C@8F zV=T@CRlH&xw&vAf3=QQ&%MX5_y$tJ^U#nkkJ>>)33xSHorfNhyk+1UCylY`m1>tdN z8QlwK@Hw0c8vE8~8Wfxm%lXkSmFbe1Z5PR~uy#v>44pZsHj3ULTiXKlzlG-JV>{3p zqR0Q!G}~s=XY6Pe6a!gPfd^RSPBJLQEjvi;Xsoi&*7#B|p@=2JE8WTR$z9E(WZ+!X zvateK;+@Y1rHvzj_HP@_Ke)@t6G=emr9XCFC94+%V~&n=LGmHZw!g>!$?x2CEnEQw z)~V(YTSSRPnZ&-)TXXYIqjt)#mn8zFzQt}?@wvEdQ%Ex;Cuq)!>0EpAYX6c1<4|!) ztZVwR`qiD^ZziS9wddjEn;kM| zUq-AhOzyi*4amWPo2s7Uz25%*3~A~Bz3v0-MFU2~%`bkRGcqzVGn-JwgrMhogZi+s*uuLT@dJ+#$4baDy~znZ4m;%I+XK z(Jx=Vq!JD9-WXKyKKh}FVi(v6Ixc86gj%RC;+9hSOeyH^|A_vI;j7^7A8GYqZ;YWS z05#I$;)vns-i7akoX#ll^mjc%^&L#X`nrv8eXV=wWKNI=kCKIW<`?$lz3uJod47f9 zgXEfegQhFpp!^3m_u&mbT*#vI%E~xDh}u2X8}Oq0n39r`<*Ja^mL!QP>Mvf@Tht%j z^U!{k6J`J5;OH2167H@eL4S9m3V^01zP!+r=n>L9z%$+`qXL$* z0(~T>*;xaC$zrx@xOLXwTje0l3^0el$ztnszkI2* zo+6@28brIq44FG)hU)$Uy5KW6G$E0?X&$b02g zt6z-MAT&JwDtg^@AUY;ytlS!~Do=$(okTItycj3Rs0w`UA%bbVu)zS67;|WY|6Rv) zTd;`HsbdWky6E(FSsB$^Xtp1f;><6J)SNmOaxHU}?z_^|Qut2z(76$TwlFIFd& zUs)+W{9&fo-PtYAI0!g^1_#H*#l`-b(&*^xr8MlH;dXKE$20E9p3ccv%=%4B08*E> zZ?SAwb(siP>hMLhs_E!pk5Uas%6BR)H$8FggsQFBw}sxhn7|_qKYq(cBwiDM&p9Gm zLEF7`%*$&3DQ?8`s8Jz4+1KwO|zEBrwdvvds>TXo0Za+S=O9d2NuH-qA6py~LAv z{8I7~ro&tWQa53z*<8Hx>$QB-uKr0W?LemT@R`D{I^cXjEFHoik=eU2Xn7R?&Za`c z_?fZs9CyKYs$!uh@B+-S3&^JIXpmS2^+~-l#EQ$Ree*7<&DcFFSEuD|1_p-Zms8SH z+_}6cDQF8|b&ER`;qON~0o)_djYwv*eBNb}dXUoKZrpPneZoQQ=w-Az{Oi~Bno~J3 zfBNVuCU1Y=1)LhSy8lfpPbDjG?5w)#=D+9Z%#Ed#q?0XM34*J7YZBtQwj7!3>`eC9 z%rsvX9J#ADIx~%F2p4Xn=jHW&%P5g%C!g5%*k~p}CDh-zJZ$l`#wv zoGb(PtYJ8jtL-EKY3lX}fnmX$K_p)9=+c0hf#nR$84W4eocI>q#nb0UnPJC96xd;sn4d`M1R#pZ@r@E9F|6Re! zf%;4(D@&i)$ng==InztkNu*x&Lg3`>gCZFV z63~x9<_bED{x43fKITm2R?m-&jO4MEGI)1&V0c(UP>`zlO7hKjGg~`5uGOP<4x?Cb z%lMfyR{Wqf9WJ*ZwGoh;*rdh$9kGN;ilD(kb2DDKJBaA|(PMB2v=Q0uD%vl++;IARyw9 z0@C&EhvWO5bG&}<_t)=uxV(JW&))0V`@Yw@*V-#wRrx*<0Sy5R1|yP_MX1AIXQsi= z6a2H_33`t1HuwwAOyNEPb^`r*QlBn161qdKupjf!Yp}Dp(gbjN#(Vm)a6_pI8>`9N1Ca>ImQl%WPdZO-EVonz@pGlu95G#X zuY_Hb&2*lbEzDdfP`;HTS8e#f_U^+8&qy0&wOK3Lvhn5aTlHaH&5OYnLh39rsF<%8 zaq-Pg1pBC7;R$LTOjG4X`F8%3EZ8Zsalv~Q(%h2g~7;gEEocVvx405U@`D4 z9#|md^^`NPEPN~>j6qt~8U|yP62^halJ#AINk()G1&m5`0iQFW)2e)HYs+#32L?-eXC%Dy>ldAfr{bX-3WYj3 zK0-9hG#ix&#HAWglfYEp4=Gwlw^0y>YAY%Y3=Wn)QO?~wIJ&Y8gME}K5?Y%W9!4JS zu8Te(CB%EwXhNTg4-4d~Gb$Ng;+YWi6ZWZ_auJ*u9BipyURkj`v$%5*C3Ax;MCv-f z3?_>?a{esrMc6MMTA9pVj|2vM{-mP1Iz!UO>FL+u8z@nSBt`FyZFv|hB7C`i80jfU zcR!lmlHv<>WQd0+5?TJOJL^0Q#=m$Bwgk2p`taO;-JTc>45t40zwIQ8?OX%h<-?8y zr_{Z3Yf@TZUnQ~fh)nH(?fxIl%|$0s(3?*`er=gYJq_#F!ka=Dg2@8=<^$#}hu+@t zd+dC(036O6M9YoXyk0fs zIM&cgwt9qkIWp65c$Ky-%M~qz;4W(VFR6n;ceu4Gg+?N#|Z!KMiEH zI#0mT_vI~nM3!oSM|XE^r!EC#RcCWmO=q(H_&0f~X|o}Ky1+iTvhXmy_$PGCfh4f= zLSmi2qp5Fl*b5pZ4-JYnx1mR15IH=x&QZ`sHAnR)hm{|+{&k9lo^qMVw$!is?S9a% zJWw>I*C)gadu!l4Qc)R;6xva%z?g$W4gmEBsAOO$R$`%!Cyv4+r9Dd@J7Bp5wus+)^4Nk z!@B!71b9UE+H~oz!(fT8eUC8fANBf8>*FnK>fAl63=rSkg|8B~g;OvWYOy7KajBW~ z^Ya^htw9WNaoCO~$pj0XwhXMy+p8(2;4bj_i%ZMz3=+->}Dsh|m!afpzqX__wZC zj&tCjCA$!Oo0k`XppOm>dnz-xn3}9f42#M0Dc~cpzMjaQsF>sTam6zd$AZ6Q#zoLq z+}Mt!NY4Y>y1>kN)N)kru>zdtX2wySMBdwy1w%C%;0R!Wv$to20`@|=NYw_5C$Fsv zwbvvo&KTTR&bdasf&5hq`)!n~5dDn|M<3cwK%aRB z7&I*LBtAhmlL0fO90HJ(K^2Y2k+~l(o1;1a2cC`*ej9ZsQI=HALnBT-bH)(U$^gJ* z6ONS1vt@f1!~;P~hXW5sBh#5I)*vt6)^W}d^8|V-s8sK|uQ>AbOi3X-w%y>lxM_m(f7KZTmG} z+F}w29L^K4Z(a$Tl8|E%Z7#38xWuGs3)v&fNO9q$N?W()>+0{9ng@Q^h>~B=h}ie; zPQ8yx*n9)M#K>ga@KE&Kkozm=o4U|KX7A{t&*5>ERm^<3Uf*JI4qET-kG`AyMQT;% z*5GJsJKN8KphNix;6;%M0Q({sLSVG`UdB~X7;+d9k|r*<$)d<198hg-f$_x9_*yxY z*yOsDe20`acIu1v?noqUlnlIVd}YSvRhR>RQoBXRyYnyxBzdn5SZ;_jSOZtu{#gnL zOx;!ws|>8l_qq~!3h$HRCUjbskM8g9b1$EPea`#sdn;BP*@tOS32}V(Ipu?PmN%wV zS+e@EVK)DXT>EuK&P-x^C^DGpr^AxkE*fce?8ljMlrt_gqD-d;fe;#Mm#7Ma2Ox|T$4-(1*S-JAcgio#D&RGtUXrm*OEF_4}mZK z@PNuj&}!98(;psgR}@!O6_!1BzT*X45)4aTQe-Ol;8tfcq#k45hODn+JIxV{+WCa% zi8nZzr#ptcy39)7$sm!Hy*BQ`DL6Pdg>4S}Aa4GPzw+Zauy+^!DKeN&hUi2@Qk0Xr zzq?g+S#4tbb9Hk^9Y7oebdoOb$5P@p&by5|RMgkkzZV0Vlj4I*5TCq@-lHKo>w>`! znR+53B6~+3QPJ9f6MwjwzIWZ!pqOsSWte>9VNp`~a0ywgRM8Nhz$Rztqk|q15+b4t z!5_zfebkXHW~C@m`XLPvmuZAkFl#%xD`#LMZ^IuV6ND=N0;WspR4osgmneN&&+zD| z973xgUdiUe<xxsY6{LNOi;GuRR|iR| zCY|_)uj7CE@+GF-!i?goOs1=cif8A!Ky{auPfLYr!($Z-z4!O1n;%QG&!zT)eh z;NAXQl}NGKK<}3P8ffci-PRS14S<;XQJH!&RZ$mJ+c{(24Nmu~+5W~zgZKdo^q2HEcqbFeg1!O@7Y0iOL|xxA|W#5!H6a;sS$zE?tsIW~RVqlYGW|A&ty2SFN^`ft)aKYiA05B!vA z;^qk?1}515Pz`<17$+vtl2REZ`vR$=@A^*%0Ivg+K89Q@46==ayxGULKQMgX?eytC z^)@5qEPek2L_`KOpb=e&H8X_=Q$?LB^LdKJCQA)XeK8oWin&KmOXt0>6qY2-B#R*P>dzL z`InTSMM~+m{ab1K?ei{?{Y_y)ACOEbMLlc1K?cu%TQa=t^%yDChyJaw9c^q(#Qr0% zht{^df5)|8?!En#xgc~-lP_EBhre0u;o(u3-j6->4|oFb-Q^}qeYy7Y*pqvqyVN&a zcjQ3sIk&l2sAX>yaV8LcsoFhU$^OCWh%VY*V9kxuu`V;%sHC#8a(Lb?T+F{A?J7NC zh?K+AWVxRGV4$=w`@YjY0}C--Tt2ZR8qJJ0s&Wf)zO$DLDC+I^gtjCR1KutuD5$B? zUnavfwXykZ!3`V*oSP;p@s~xV0gT0|Ixie46aukM==$F&W)3MdKQj|1%{r<&TCB11`B^A;SWc=Jt+-4`s#Z1i z&e8b>uoKF(%Tz>tXa(}dc!7Zyy4QroDGt0D!SE8h*3&p`9>PwE$$a<3O{TyZEd2NsypJKA} zugL3m8i$v!m-ahVhBleP%SIiT;#eI?U@wCIM9a_9N=^Upur+^@hDeL56%euv_$etV z4(0%`)y&9joWQJIW3NO$poz5#h00C26O~hh(Z>QLBqVFo4QY;w2%{3DhYEk>IaprA zeYi$uCW29e#V!^z`mWS#At>k!6mE4y=}DII3JT1LQj`~V4*IuYlD&WUTn)mZB&1QJ z9lFdGNZlyii!j-nQx~#xh;SB`MdA?0&y6pN_|pXIQehJmWC5&_I{{lu9-g@h&w5| z8F>K??+e&%iWWZ>T(FA>`A4n`cdIfW4sBZ43^y)&(qd*BwYuu`vZcTW_U6OUQh5)k zRNTn8AB`OH`-Q*e7Sa3~Kq4j_D5dM;+EAGVoq9|gqSsxM0S^MzrmsPYC?oTz*F#!} z)2#Aq5H6>I2~pE^KvpGyosrr-eF5td;~Vf0y&`)jfE2a6!Z;tlD<#*|bXB31iXQ+( zeFmP}WV2arVXW!?2Ws`l01)rwfaEz<7V4RKPIj|~%B9eIOu&)T{?^Q_;?VqAknfi# z25>Ur`NQ`o!=YMFH$EhpUE=cTpb4J*?c&?X04E$q{s{gW$OvwSw*98RghbkLR$RM0k1|&o7XC(0BR}-j~0F(mT;*wW_Mi;cs8494$ z=9X6sAo}FA1_+VTU9oaZhQG3hcn5@u{#4rGEA*1McNS30`YOF(+{YtnW~Nr7U4QOa z257pdlkV1yjopWJD_M5m&_hvpXa85eh;JMBvkgzDevXMkVa)`W(cic~4VZThLdL1KTI* z&7)mnB($|qSahJex!MyDkE?!ZUw^3h z~9!%K#fS&f&T5uAnHqzEs)<|`q5=PrT( zU5yl#BU`cV?gRalwj{+jXY2GQ2B*FAbp(wDFu zRP{HhZvHMIqj0e7zhyHh2>(u#ovL6l9s)gwX1+EtXbF?O3C#BNygte8G7Ij+?K73* zlhe~&{#duFu+Jc!;i{>r(cQ4>!+|}g^55*PR6e{=x>olquSj-ax|~N&Ned5y0rto<9uL;^}O zK|227B@PyFM85Oa{GBxw|E{t9mwwmY`95Do7a zk_jM<-|PZ`GL6DwLaxvst2AhwT-e+ne?$rVPsBvmV1c&sQ5YH>Ey3x#uHd^AL=?Gq@P6(2%_<6zf_Js)whGueoLce9r9DscXI&TrI$MLB%%$x zLc=Twq~`=uM>l?YPEj@DK`ZnFMjb5}S*}c`odmDm>o({NA_9D-O#k}%XVmc}-2*=r zU*A)Nk~o|NwvKewA?G$oj7@UwoyW&Nv~6pGnXI6h`3&c?EZ~;1PI~b!rRW)FDC^e9O%j~OX|NP)xwtNrk6@r|G!`Z;|PQ9-cVjry0bUGtgoafodldh$eU zyJWo>zYJ6kR7Xbmwi3R750lj_@>@={c2zo>1;UBNq#a?le11CVFMtd}tz4h$tC->-Cx#=;@*$#g<>p$UBSG5 zUd!@pY2_95mec3i(|}nPSHVm_+3|49C3eO2vq0HyVX8zTsvjLm0)Fw4F{8J4p}wg& zuVldHoouE~yk&jf_{!>9!^64HFJHdIV3gr*f^_0((P6^E!XO?)VVt^dGgBf7lzG56 zr5#&I`-!bZFfjASU|x`SJRM(Qf`d}~mUm5cH5jkG1LSca`Oa#jUz~F-W{k-A_!00J z8%4Qq&cTQMuljCk{ha=8uA#JcVlj7+?NP81mAy%DPBERRY zb1YsIsha}(2RaAonQ6X1elYJ1X@r?6Y&HePArg&Jx!WYqgRGgI46OzSmS#Hyzk53l+)%-kE_ zigqq~+a3_IG~U(qq}^h1X=&5D4#D{BS<}pITE3E1$d@&f(wbat&qn=V4j{XT7bcsd zuIabBQ!-tzRO!4~tdL=4-f)2vm77ujdA6I8qh!h?vhPg+3Q<5^5x*AG*D^6qm{XE>o^jgbM9wab_F`tv4$l(D0*J$L?v4)8F z(Utlw*$zMJxB|9o-VNzqHOCUQCHrI{28kyG*P8J_NZeo-%$*AG;^tePnK6~9$nfYD+B z(d{B@1(#SEEa6|jQ|6BSFtPdRv@|}jnB#m>ny=Sc^Y)On(bP>3$bj~${qnRY)4p-& zoskADkn~M^Z@g&^ZHKDgS@QzcCs^{GrU#V7;6eV|3N;u zJ@Q=|#U+d@C&m(f99;?U->VDQ6&adZ$M)2M{o5?EzIR&@1C>2hV%gSEC$nv*n&On$ zDoo&4B;Z#9k&PdLqxFaGO*lxLoex`i)I|(MXwni5UnPetV>lHt2%P0lRr%WkhRk8o2K1X^Ly5=p9gVu{h>e_%_ma_+}@TjE*7Rz$thv2*8K0Uq225?rMtB}qO24Q|;!IZ!V^ zMAvkxE`FEH=l~*6N!8&;_zQxq2(_PD9!i?hQF>K;_FQ;wgjm8D=D)nWF{sDMrfaS3 z-|%tq^7TBNw#tZ`&1znjVpuUlqglSmyB_L$2!vE>UB2~TP{`+M4D*L2Hc2Or<{T?( z=(Cr&PRvmodcFm|ayN|gIrVdhvfdfwyv@J;ZrwS4U)g;8rEr!D8HkpmF;y+9?BbWA zdb*_V_3kP<*!uGeS6xmse%jN;@#OoZT=LF!qLrVkIwPvt#d+$(HWdSoMQ`)ihv(n= z$FO%fzzSI_cpEOKB}FOA%<+`&9!{9C(4^@;?e1#Jxn!mhxxN?086~T!TRg6(_Rv~J z$B~p|$g-{mZYA{Om&i>Iq$7$Hm?J8b z;sRN!QBaZJaZ7jXC0oI*&Mmt^EpXBuZj&!J7PnLq6`07KQ+6plmHa~VCVx4e&$z-3 zG6mcA>Bo=U3goM@%M5y+)KTR-g8y$>&QlLo02Ab^EQ_)mBO4{TE9jE&yPKW3y1T&_ zgW@ELQL3}k0c`+8Mwr>m&%Oz|n$KipE2ROB_V?~--3lcSa(@zKie>e$B27|eZ_^P; z)p^>wYS?PZ@69EXxt^F^!*d}V;VaJ1Af<%0<-@vXdj0q54?XV6HAYm!WBb1tJneD( zMPnrt@A>I&>8l6oRc?24Ex?Y-}iYIXhS1A8f*epq$@JTJsb>utQgrX>}X zn^Kjpf^LQ)N6CY(n;jPO5m+5B1Eund8&!ykC-0Nq;%-I&ZOUYSB_As>>}Po$9z}eOUF+N6J$qaWZ5;R~`B<2DzVe zadD_|=e>w8_%;t1CPbeQYoHX5c+V>Ua9|lPs#`&42(m3Ixhl6j2QSuoUJ(UE{<)iE zEjd!{&2G9+zbTZ}64{1>d_C{|Aa_Xc;NRQSg(~qeOoeBsA z)+COs_x&e#WA6~!rtew^rHIi-_bh zw+45Vqm($g`X?K%cYK?lkMNPG?p*ibp`YJ~;2uq(deHuDV`H{XR!t-O={ID<%?i4j zO+)UCjpjOg9;WXLap59oTDaAGWko-zAS_)&e6dqgv$0{jF>XN?a#!pXBzOhR+|z?G zbz((%dEYxviw`;3+Fqfhjgu8#npRtOy=kUV&gzF{ekh=neEs>PlB88pBzxiK<_Aj# zSFK%}JS>SimDW4KDb?;egE`gIS-lo2gEUb?mX^9*-K z(?-A8trb=dW8G(5P)AXl-*XPGX`Ih(!A-xQ`eL^P`B$IAaF6=Xq3P&11h?l7mq(RB zsiXcFhh@Io9!=cyjShJ)FR!ss%JKX-Zr$p&Sk{W{hStnl-#X^!c|+bAj*JPpen0BX z?B8p@el0(Z7kEAsD%u_$Ro=)j$SM~!A`X!XW|%-85XIMc`D}l=bYhQ5I|pM>;1?p$ zk7U3H73e}SZtlL3kyn#PIH1dDxddt;`djdp*F1y;lB?aa&V<1zU$XZ&z+jS4B>!(6 zR=a8KDH_XeIm@rTpkr^{fF68jjGdwwc)MO?`bmJz)`%Dy_;Z^wGJMmD&bZ z)Ap~ZHTo-3N8a3;dpEwaOgDD+=xNyJ8JEXW=^DAprCixw>0i5qQZS&nCclRmQK1HH z8!ofW7cg67YOcB9ve?Gf*7PJ4ac;jKBbb7rT4)nV!MF$-eBkO>@r2~z`}bjDv-ayK zQR|-6`3J+_y-shPo;@S>RHh~qN>RQ`s)MV;PF*t07JE^^3RKQO#h=>x{i0MOxOAJ< zNmtDpT3bf}9Uc%6@Yl_sARQ>L5m@(8ZFaSS4wkH+)Yh?sc7}I0Xz1zv7xQvagPUJL z1~5#Mn=IzT&xgJk(raU|-l<+|m-*qYKU+sI_^@oF_?gAQg1k!vm?q7xGUY?#wpQWO zs@--kWO9S<-mUNM%AG*Tvr~sQHP*x|vRuBx*=Gi?Mll#zFDva_?( z(}O?MVWQ=46LZAsKJ|)Y^^U?cXM-dcjW9-q|oS$yD)$X zfvZ1%k&9rmckiaJLAR*!$V_9f2%{`QOiT>HSj=zh;P7kD*WnG2^6=Pe*=F@JDIcDO z15fqv-o1Oy&Lz8+*Ew;>6Ylg*dPq2gz?N8><7sCH`2m2sT|HQkD|0sU8zmDW}Q=79BjMV=NGS>TYv_74t5GnZe^6jWL_A6zT3yxd7s-{}{wo9_GBPrHdV1gn9Yx{3-fe*_xrBRnB6W~` zif(eo1)%?(3c&^*wyXQ`LhV|>p{2vEL6X8v3ygz$rP`Ogb5t>Tf5O z@%zw@z6cw%Tft&SE1lB@?Wh9u!=7Cg0iAelV`1M9n`(Ik1dNtDwRkB&F6cG9B~ZAs zxuD_S*2$MCld1^h*GoU0f{FVXv#Jb9lwHnrw>G zTp+`eof zcCfu-9tBDsrKP1Fo;UHOoSoMeqnjM~Dbx%NQ?v>!ok6tNnQQ>7t*uo8Zxku*dm=rH z^r|zeh!PA2mj%F2y6<$W9C60AgI*(VAD=Yu#k{R)>d)t%rc~ug?2m(7+FOXevt!DF zpygh}QG4OCo!?RyhYebf|E?*2QcJ^hzf%D4>C_Z#t-Dn}e>7dYR=;m%CX^aq*KQec zzw+e%(2yPQbI^am3=XrJI-ph2hXG$oE62P4S+h22+X1ccD!6NDA)!14&Nj0|3lqFd zxnQt*a&@5gG7SxPq9WAiH=?r(ye?B5c5B!;;Gp+1L$J}3~ez3ceO3GXGEy)BbEafWzt@(7%MJ- zl2*+mh90;-Hh-mvXY^tntp@-{*$=I)nUj8iFiJ*3$PGF=mftmwmRpZ9dVKfz^a$ML z%1!Y}I)$_Gy6v;mM_b$2gw2TrA(;*<~?uBTF8z|){2R_$`qGZ zQo^;eUu70A>}eJ70%idoL!Ob!OnIDVTqYEu94&;xIXBWkj)*N$~#wGoU$H literal 0 HcmV?d00001 diff --git a/images/scan.png b/images/scan.png new file mode 100644 index 0000000000000000000000000000000000000000..bef60b23051188aee6ad5cc4dda4adac21ae7d13 GIT binary patch literal 16103 zcmd73byU^S*Ee`YEG#-CrIAvQPDx4WF6jp8Mx-tvAT2G8q;!MQja=e#Ns;dEdd~%a z&-2VXGi&~sHEYdvdE?9J^VxfU&ffb3$;pa8Mtg<^fj}NhycJP^Kv3i$kXwcKQNR-w z!>PC6G4E+;A2!sqGA@WAaC23>I-IfrZ zOmj*~;t@=ZfV9O?YC2&Bi_l3LDwc|WGT=EBl;wTWQ5XNiPqMy%$GK8La4Ii4dolbw zA#O<4R6?z@g<5T0oYqWT9oA>+J50`-1-|Q?LDe7hHNVhMzjtR?!}$a?qEEjcEEt8I zO7`3Hic=}$jv)4Zhq;rB^AXceIZp}68aZNfkvL)gwIEg4IC~nG>V*9`+CVDh1iHV6V2U=TOOAI=o(2b+Bi!OIN6$1$5<0DJQ$2J}w zNlUJkQ!rRFyEL`pOMK~?dl1Msy{yInaS@$j4)ps;{GJ+x6#hbKavf}NAo4rY>RngK6?moLe-W?j8N zU(k9TW(;+P*m*v#JZ1<)P|TsL>fY+w+Cx;7U14_C=dS5OyI_Gneu^RAMlRb^6^&bv zkEI`f-v0mLyPk9neCr6jCM+wbr*$ejtgFf%yXFk7V59m#iJkd>fvzFNh`Cl`#H0$<~EKP>1ojyihuj!)&C52U`rb9ketej|5jB6UvVg{OrF><=F!y97z^k!M7!&Y8zP$V z0tF)Y9n%*+7SMR`5tO$09Xzvzhx%*Q7Jcx~pKtBTD3FhK&!-!k+4l>RHAv|Tai?qS zm&Q7=^G8-4xEsKZb`{pzaxzpMR-&UJkrF#{2l7#&)csU(bo*4l!P>x%#TT>JxC~(_ zyA*GBM~9OrwoDS;FE$o8WvQd1qfmwZVUxot>rgQmc=T;L4+BpFe`s#r=1wtx{=uw6 zBqi1Qt_yEQM#io%SeFC0z`#I+u&c%=UitTWdJsH-6umlq)#-7Utr@I{*_}BY<|7Nw zYHS|>Wc%Fa>^@0iAv$y3$%H+IHIr-a%Ln0yp zlAe2wL_`?K+OFs}(`-A2xxK$IFkC=m_h7vZ1TGta_Vyw-b$A!F;H3f_;v#=;K?M7) z3WC!h6Cw|vg7uGe>1e7kc9AO=Hp_iYy~5VYgSrh({mXg^nDG;@^F$%Cw#? z%mkL}p+w`?x3m9l2HiwbG4@F~`yyv@(XUnTuZvA6>yh!FiglWX=W&5z$OboC#S6;k zt%aHwG3-5fArM-VU1$uE?oW#A_WIlN@Ed=z+w+P=0Y|QX|TJ z&|&ADHa|b!*IW0IOI`E)9oN8xAL`xfALzPD45Y%ZgF3(370+yeR~5ptp`pPtykm}l zn3#$@Y%U-m0NT0Yfc8mH%)+3#>0&mEdkTTrK1&B~;$mW! z?1^>*%gebF8?i^#T1+WR?^5~uQa&T6!Psi^pnNEDjxEE8uJ$#+qD4zrdq;=6n_ILy zH8ph@F;9Dnw1`)eZjU68lMg*xeyG%ramFZb(8a{X)#Hp+RpBc`IYGHuS^gKphAek{ z--NWKYcm45&y`?3LScSKp+ejDlcLR^yKX8)-JJ;=DI$-&Fd&fN1pdlKcc!0}x1B>_ z^O?c^K(Ce|lKNSP(0Ie<<_rG1^5Z|kjF`-O7}#QG@h*lNY#JR#)3zuS`gW#O^NAOm z?#NpUjZZ+3QBg$ZkIruUd>k=f_-{p``rV?d1t2AvQsP0~Ji+}(gh}zd z4}JlCMFWfs8Nl41H{>ZzdOZb7=d-%cxKjpH*%Xl5_|d#|Y<%WisxsJk#Cc$a#Pyi! zWvE^jFlE2L;Goz<6mzkE^5J9sjG=o+&Bsv5Y|8BTj zh*gpH+sGin^bKdlT22E43VmU|pWSd$_y`3^OgW~_bjA7rJYnz}(59jcAE{m&A#w6s z(gSMT<8wahcU>97%u@nwhKw9Wxa$~ae~mU%axW>eA)UP3E*qn3G(8JCGFfyNLh%JV z43UBUvaqDsQtT%{PRUg8Gg8`Ew?cwnAzPPAbuo$5#~_FC!P4_Bx}6=N*S+|=>=i+% zDPP_cdF>fwh28uQovT2O={~Zl;3J#Ml}>0P&}FR+8gO=Gzn-ko-(KUrBHL5e2S_OA zVVM+_h6<_O2n{5rZ3}H!1)X*Krm}>FZsej3CdSI+)8tqWRhlQrD(09A4;~mdqK|%# zy>3t@l6Rs0i&z0Va+!vyW)|vgjsM(?{T%~&ziesWr)MQXPtkarW^Ae^w3*`Ua(Wg@ zCpL77f$(+&Z#QmjL?X z$qokn9j9C2u=)TXb$8zAA*e0P%-$1HN73Ow%F|^7Mc7N4>m)ug6#gV6AaK1nfL9a& z)`!hzc1aEN9ftDAinRNlRw8A4Ab-o&(Zf%_XJ!USo;-aTDD<5C+F1ERn`eJaj!y{D zV(T~Ehd`*lTc3JjVPF*UmmY<5bxjft14-{&bVxbb{#mBRU|p4Hb#!o$%uegii zv$OLdf22FWv~*rEUlwfrXLjq0aC{65KG=uP6IILu6%vu;+q@(|Q_xP35E4cW$*%0L z3|Y#eU_5^G$W%iV%m8ht4yA&q(?&)CCJ`CE#SPFs7&VGJ>b*})PTr7q>V5Hm;KiPH zhKQQk9SB5;Y$2(*%^ZI%#5gv)oE=H7g+o2!jP343?%6fPMhKNu;n}+ z^r7hdvo4Qxz`6 zu{2yuzBJ&R1cixbZ2e!Pg(6Ej#RA>YS1Vp>Lxx1?q7G-4kZp456 z%IUO@@6Lj8a^cly0mR~tzJc;ngyU!HC zjxw-za2Ms)NghB>G)-H=py*J2#=XZH5{pM?fHqB8SJl(tKPnxhpS4x9-mrRpZH;=( zA3z}f21_QSqv#le1!L8`T=6sAU;o=0v76fK=?PA^?42JUSC5z3e73>q=qA2O!$6Xuh7}DCQ&Bn$?K{B$f_pDfvG%tX-bYU%_L^L_0 z11^w%s(QsXGlB@X=4#tygHkqP%#CkE?p`9$CZ3itZ)ZGI#G5|dUx_;u*o&-GJpBO! z3uEaEXjjaG2N4I$1LEyV!cwH?yKaYBR*#X8ngG;6I`nx0T;P(t7K0+)t%&^$R(VZW z8~`hV!H6*AMMI~1O?3ZMgg5FD-b-SWkJJ@!xz3D382 zE@x;HzpIG58(-H5>F3UczYga6&H;rAcj57c9Z{} zD%J~>1r2!hE%(fE{8!N^mtq0dvqfSI?fB|n?2UF>{(~tif=B_=_{$+-Dv)F(PcOB` z;l(2?;?`h&6~TuXW#GH+#1|iH%W`^6b0%`*nI2rAkuDe$W8*@>!b)4Y(oyOuQgTUT zAKfuEh=Dpd%G8RJcu7?kc3s5Z3*YQ3&D}@Zl|Qq8DO5Ddl6~UhE~T%fnw#=KtYAJ% z;D+%%!4dzjAKP-Usc+@v=orew%QEg|bT4!-;{A}?A~2X^OXJW4Fx|#~2Jb0VUX`Em zfBnKCO^=^?RL^^8a;;7(fufg~B@_{H6OMJ}8ZhvtMh->KVn-7t{!l?whFx@_`{?!$~IY5s9_AGuO!()F%!Q+|H=q+b2( z+)pZ0RS3!oRm6~LWLpphcy`2spI`lv38wt&*MqYQ@M2Fatg4FJ`0&3S%>{#k!qvL; zLt})N2Qs4>d5huhhwJKzebieY{-H7bolT@97jatHL-JA_ISYwbpUdB~c%0otT^$~QRf_Zr@xy)26x9+pg15+17$hR~ zyvF5>1w(nv%k)obU%{8FeH(9fPYMx{;)|7_mVx;)fv9Umk%=AxElbQ$+P5MoA|Os_ z=z$fLc8>v>&KqG=ycQ$M{yV@n!tLU5>`taPbdE?}ht=N8lrmKx|NYIx56DE}ZNorW>Gk^b+ zj|XFxs4oit75a3XFi180Peq_N(aZJt^h(nHy!q?L$nr|Ez8OluboUw0SZUKJD2@KQsO5YEeEUMP$I&h6|FOAq7bg zHlb&nBk3;_MEvJh^@v|5O0%RpxrgoP4k-ZO9@qj>pZYyOUn@)irMoPy(VP7_XS+s(O5*muOA$UT@{%-z>RY zKa#ER&n;@p?_X=7ZwtsgBW`pbPs>~fD^m{bDJxyyPkW?5mnD@=Y#Wc^}oy&5bh*xCE23ae$Yju#KrfVofUAac?u^J3{omKXkQbT%0FCIX{&h zRvW;<$=NBE?cnSTNU|$033;TZ<0rDt!Uje^q35qrZ$Wltt4d1OqOSTAczzug7t6{T zYYmGxjayDv@W~6ri*g}eCibluqGu7fd?NetJV6BeNnoEb>%bnEWnJCG*3mtbTlph- z`pIDXc?N@;LaYnWBN~Hd?Eg+6un$JPFQcu9yjSR{V9oB}EO2&q*3sUM+=CwT3XMXQ zoFTKSiJ;C|TT@MOKy-qFRkwok!89N7hP3^4s>5=6^7yft94YcB>Tw_o|84EEzG4|1 zxs>S4fAAHJZHNTvJEXPL)qWLN@&t^MjopuQ1_6xUM~-p^FnE5iR* z>Qu>UIw)P~^FJWEuCi}gpF0H{n6M|T1?S$_HY!;}JHGMHldq z(1~tKbgs}k?BkJ-F7oz>vcgln8IELpx~mB%OgNyMp{H(VB0eU|k(`B109M-(ZFOMx z5Xh)pDZuF5FV#|Jxy=DyrR^$r86@wamDguD?7He;2PngF{MVQ=5M|&f8MgTEM^X48 zUay;xrQ{=ktwWB5vWVNtu}-y>AB-}Ur|Hv$<=G6J;Y`M;U6buUh-HWo_m z6`}r9}SN=&kWGxs3J@M*g2fHcCp$`|BepH{{_)#3F{Esm7vvto8OoEwqd$NdB zGjlJILj67DVMGqC+;m$0!NH9_aBY~Loqb=q;ZzrtTU^YHeji=_-Me?r&Q)RuNY(?( zbybzTh>te@15EVn?5vN^t&q0nLl+=Jc6C!;P2fce+WaEEv#7m9|AKu>^82+KsCU^( z2R`4SFsKK=uBs{?@Pz{(XoF6%3J$QCj9}{nB*Ao*G>~mSWF~rY=FwPE@{0Vg>t$~| zE7J9jc*hm?&|hd=890zCfqRnJLsYfryRv1_fr-K?e0%<(qihaq*`XFKwh$8vBsYJt zKP%b(7J1}8PAUgcsEll$^6u6J%G$tuf^%=*OiE%?p-R^(OJbkhZA%NYJ1Jd;n5aHL zJq_4N$uhq|hjFQ$0%!im*P=z20_5O?PIO9f`CIWttj9vLXywkxT&(BW;deE=#!Sb@ z;d7kqWKqb8nV8TZbwD`u+{>E`9fU2Oje^lCVB|0dRWV~>5WILS?xw_0vyW7yxH@1_ z2bO>y6&W5HGAIENkC34iLwDdmuXF@!qLhy1-N8QG;3iJR!5<}Y^o)csH~sbN*O^X6 z5D1wz@Pd9g#f&)Z;~_ttPi1#MctYYSJr%ikpS})m&wR~Nibf0f5NrsT27b8!y*TE6t`4x&NsnGEYd6J6*4sJ;lmQ z?6RXvBwwEGn{iacP~t^v{K)AjD> z)f#=?JAJv0Ltk<0$A)Es^IM+ySIE%V$Frc4&=8wermJW^mh~#5Nl=##N#D_e z@F$KJblXty#wc!IS+>~=bQVx%YNH@eU)Z1L%iUL)C2Tssfk!Om$l@&*W+uGn2|KMv ztn?4e;$Z9=E+pX~G~>=oa`>aTc)+@f9t6<@1HL$oPl){q4Gnd4v`XQ`wF4#e^z14k9#YsboV_S==L<=LQ@wwZ zJl`mfQb8(%prD}5P3r~OGDI&sCsI;ZRUbpX&6kwddB*dxkgU8JH=`+hL%-CY%;P9o z!P5==lky7V$;XX5+ZORLe&tf)8a{w~1c1YOU|>+^(%u8$;kL_{MMtfvsPdPi^$Q?_ zKr+G$x6YTY)YwS%p89)U?OGqaNl5DbmK*MMaAG5n^nOCuE$?85;vkA3MlrJ&1nUSy z%@Em_xv?&c-?&>7~9_EuG;QXzr zX5QEj2rj;`3AjDeSwNdP<@6r7jXx4eNY}+(!hQ-Qs>{Evd!)xG0}cv;;|s(ZKrR!a z7H!0My`LA}7kOJ({|3ZS1U28*q4?mEKt2X+O4S7?bjO_?)P));0SfTTF;H z%gJAOQFoPG^x@sz1rZK?0aAjiQ7xS5+&(3xL^fu))>!VInY9~!X?54Es{F_4qIb=4okbO*r8Xb|e!1U@ zb&sV^h9nai0`2VfG=o>?l%CD3nAdFJ?zRoD3!FFM;rinoZUZ$YaB>M>eP*syWZ{a` zTZXbRIa&DSiIu;*0N&R}s!dgq`^#! z);{pVYdbQ%M4vwZJ4B{E7-A#B+|X6Y`nJxGO6ibz`eHnlN%kjTg(U9CT!56^qC%yn z^e<@0(u{~q;#fI4R3r}w{DPaEG3QN~=oZNLdV4^netT&p2l-cY!R7z;bp8oLD zWysEWjk)Z*pB}cD;eGhIyrM1;na&o|B_UhqlMt%H{kK|Y(V1)@Tj+>}NlGE1e0y@HN@+pyxEim&pd0yhN zHo5UsR%TB&?HxZTeA>{!OpM2@Gw+&W1;SaCzdisaqJZ>NYJ`ZUt4;@d^1dj%zssL& zR=hLTG;TeN5i944Plua$_J^aY!|S=nZjfD5V|wYr$lmSh~PF;^yg)HuO6>y@NwBw)g+uQ@rF_fF;mEsceCML-z78T+9Y)N7mLfjl@o z81NpW5wK|05lH1yY9=N(fZeO|=Bg~I&Yf(U8|)ZwI_ZmD(s`+*x|DAXxGgZ%-^Wb0 zIy?k$mr!;|v>knCzbhg{h~Gbmp|s@I?&NshroStj{OkArKI_KJvGH6Q^HPa5i}d4L zA7Rl%j!McV=ewkYPnxpx^Lnb)7}CrN0g5W6tcR( zvggU1o^*{;p}7>G==Qf2hl8`m^0g5gHolAU#`Hg%dx4-REP{OXQA|6tchC0xcLt<{ zpkd&&JJf!TflJ+iWJxAH6k%?Rz<0c-`-*5~aKsJ~&Fe|=+R=#H*7Y>xyLx2~Z=76E z??V7u_fr!A&8JfwH9u-=YAy(i%e^kzf=^F9Mi^y)3ZS+At3|eW_F=v3Xs&Mm?p&IU z5tbux#wy0ycsf}buXgXln)7zWjjH`5(`0gbkVm&*$v0s@dLGb;I8g(pYHwPg_Zck+2{eY>5=NKZR<;XBUxa0`ooWgf>CJ<{*(FswufbS(^6jzB3j8KVSTg6o4r#b2oCHeBq@UUM!c3YHJ-4P15AiVX z>R`&Z`1BvzG%Eb3S>B2C;$41_s)1E(!;@>lmkZwWt;f=PfoBek^75Ux_gvMlWwBtC zwP)rjoWH^Ht#Ov;R~AXe2xO`i1TKEsYP9cI(>`5yejt!%W?^wTzQ;Jgr#qzbNlK4m z8$$M!WL&l8dtU!;6Z^Gw#lpyE&lc++rKo&6DI&leHsHI%db7$3QR7i;R25fN0+7=BPN)8B`#S@T&n!u>068`ZG=d@b!=i{I1>F-dxlU zRlj^LYCbzYf^eQ`*n5YEWsJA{D4<<9P(#P1WM#!9*x}O~V_$h?YZy5dNxaG|n0%W`3K%Vfrr<9M?eZft_(rF3+R z#mMa8xwJEv^=j3wu6zISElA$rw_=uBjKoCsuMKRZ=M692Exb;}Ko1vh-m$CTA9{Un z+g+F<4jk{Ca&1?^cVJuYj9mlug#itO+ry^Aev*r<&}bKJ+R%R z@z*4Qoj-Uy6-^YHJ+fN$%?w7kp3zi?qk=y2W8Xt?-IBollRd;u)?TEfZRvMiqR66!)fFvDO5DJf^#QihU%ao+@^iKE|*-2m?h%{}0abREGQF|fhw)m&{L zuCuE02Xxm)nPN!)eq1dkb`jYJWt6X#!NKgEtS-6l`3nvskCiHfRjU+LYkXfgFRv!# zmddIQeam0yRLjVhBD(xhi5Z`ql6oRk&B`|*mXN8{A@#ksvefY@U~A=N4JF{q@Tgp+ zuq=t;!QqDbmisT=D|7J8k95Ojza&}30_-uvKpO2zizeDPvU%<nCkz&| zD^CwQdW|Y<7b-0on5Iv1Ldv!p>ufj7o5S;ZW%>sAC(bLDR!O|cnH}l5xh32>W8Qyk zKKflwMkiK#Y*JNJ>+|C8P`E<{HA_3QBeF|bQt#upQs-~oe2c72&ePI_RP(DbOhp}c zt3UHnsyp6RR$3!5OAAiOO_MEH>{Me^W>avym@?8X7-x89o*7I$7&->iKd%Nyg$4ie zg*LVYN^7G4;GU9txDz5K6_QVv<4!-qHe9|ro8Zh=xxNVA*|+|7n|E~wM(URw$eCKP z`uPyI;rm6GZfuU9(QQ4SJrr6*-=%6>n6KgFJv(;C`E zO0t+<{-d)4mcFpgS2Xa;@4mVH@{haNBS8IR^;iwKAN8gBPQfkDhOn$!chR3w2SA zDaXtn6FZ-U5ieBpgG{2G#PsrlWA8ArOq@0uHjdHEisM{f8Tak^^4}Y#>3Q7R%E{K^ zSRis~&P7iC%1M{cAmS`w%BDavKiEwyHpV^|DO%IscIi7$e)_!$-|a%wT3ghf*7d-q zD)O@0Mowz7z;-_#1Ro2iiJTlvN00tO~|!%@@9W0q!H`VqbZi-*Eei9o+(! z(NSf3uadk{jgt_b?m`om%F0TT1Or#xcoWa?G$`4RQB9#4gVWDEPD<={F+{3UlUj&Q z<%GVN&`|Kp$U2WDanR;FE~?IKqzY;<9lt`%Eqm{DO0U<&Xh(NO{z{+Y84zlo>!%^a zB60FM-5liAbTrfwpE5Es7FdgoH?M-_rezRtus-lrf~$; z$RhXql6lWdKbEqRYJ5#iEDmyvH)QFZ=k@qOt)ruATl$nl0B_)=;NTh$9X0mdEjD!G z^b|Fsf~!qUHb)k~42$NgUzF8!PbzhNFsk@wM1pfl+c!4M1I}WGEN3`qYSU4cMrM4a zBv-OO%57&Lp?a; zB{3y3S>Li5lLw26N<>l$XQ?3sMF=BuDI;?8KNGA$p`ptms}}mT<`R)(EswRWqVx1i z##Zh8pre`N_yioI8SArl4uNk^D@NYt8L{b$yh+!CT`G*478x*>)t)AZSC504q~set zP+iR69W^#Fu*fG235hs48CzX-qs7)#2U92xT-49Eu#6UO4=uEu>?v!P49g!!Xc@8F zv=hRiT(zs+2FCuA`1t@KF<8+n`fbp9KD zT#KO}23F`L0oeJ5D)6jejE7~l0jps6clW~FtnW=2PlcZpX_H{TjZ_|n6XxX~;Kalb ziHiWNo_0$LJ67yOxRL9}^#L&dIFQUs)STBHtkwVa{n2fZS$@7em|ndDwRHB-AYov472ZbyO8zU%q49pZiE%_eXmeYeTtU*>x+@AkZkHYd@dt42Z*Q` zuM`f&L4I3j&-b2=kxe$`#t^kT^Q30Zav%c765pT1MFID@i02*X;TefJX99_Zmcp*h z%JbKG*g?=aPA50}ZS~gaDu=6hX)oRH^e0g?_fkc>qG~EqrnRBSs|q;xQKNfel6jnq za&kI0oIt+(uU~H%pNzxdaX$n#z|gsp`-_U0PSR$!XvqJQ3ExhV*Ryprv2mD^LgES% ze@hUS)>U;mIZNj!dwHZ0;3~luqRhmKU9%ZDT_0@p+wRsuk!knf9z@;Lvn*nF`|h>8 zGCrb?5?^yX-fm}k8=K-Yl9VMLZtil<+$n3-#&0CyfzYux;ARDUP5A=awUrKrbyA4T z*%%3bE3!0@YT_N;WeCdEsT{{S9 zc^{fSwc$h`*Dj+*U6qw3dO$(l6avBnL0B}S?6jK{1 z;A*$6Zdjp#Af;*&UEoWT>ojCB`|ulO113$D`T^uEPZdJZ?wx%qke?A{8O@?|9-sdR zU&BHDWnN)EobB-igv3ULv&=XuK6^69)=ibKiOZOIc<|OJ7XtqiH$Q*>r9xm!L}FD@ zmto{qYuAUQnJxHcH4upPGj%3`5D)&D{Nl#EmFJk0-n;XE$~xzEItQ7Y&CK^nH2_ZV z2?z$u52Ex;O`(p1oXP!@L>~+d=RuNa!;W$(t4aTllym@+Q0|OKkjONCfS&_Wo;~?> zneb{JqoS}UhAy2%L_|yy?aX&Yfx7|#1lCm+34F-w6en6<>S^x8m0V||JvUvO0!RPl zYrU(?CxOD33Asgo@AIBatsk9LtaUP>dwP0;yx5K_ZIM=fqKf5$cYlt~?z~wBS?CiR zK7x8KE?>2CJL3&H!WObq1rhg`*Vi8fws3LM+VA`oUGV2CtkEjU5Dz~*JWS!T4_I<_ zs*$o#$yaXF01G09__O1J;`4EmIXQ8F+zEwkqg^`oyerCgfIMd?ev(SBLc=u${#Vi# z1C?Y=#5gqw_|k^m$RxZ9&xyauijH;pFngF@}|NtTmjd-VtkB zbZTk}xGe#W_c>q}!h0*z6%vK-YW*6$wFTJxtLPlAc;^hQe?H_F@IP*fj)sip6P(hv&(qRwjM*08zz1YE}K?ez~ ziDX%m#jj+ z$84?Ut~C2U+;yiWeiBHr;B;u`p&#;>&5@HRDk>_HQU>H@cvmjKDqnJOoxr^>;Ffd4 z-xZ&FdcC?%QSN1AxeiFDT0MRAD2c~8Z0Q6UQeItIS=rp&TwCLD-OU6!oo5#yjTPJ~ ziu8sr*wLlNYblYTlaZ$^4J2_1ivWqRo4vkg(Je~n-72X_;tvJ6s};PD{bf!{&#Ihxfp@J)l6j?AuoyQn($1FS>w0f}3I{1IA@k=Z@fh8?hzSVfH{Y?dvuAtQP+?PD#T|mwn+Bajb>R2~Gwzt< z^1iHR*{huV7!bA8Li8@<8M6f{6~dpa=-DdkTS7Ipio6 z2wq6qeah>$zdSsQea0~7I=yyK+7t&pJv}}5YC4};wyTLwjR7|Tl3&-*Ee2 zcM{K^YNMz36WF4D?gjM&7hc{06Aw%fngu@8=Y_d@0hHu7PT%&H;vL=rS?sfIa&iE8 zmUw9A+JUUJbSF*Uf}Nv-L#H=L^`asV?}$or>N0URHGSJ30NmNGi8ztA{fPeP1v!{I zps+MK{gpb3mC3()5*92@y9T@u!>^XEJ~&%5&0%7$|Eb2iT-ViXIy(LR`wI~lxN?Jz zn%L*G*h`>t)!*MQNcLs>v0BiDHp2w;t$M;aQH>uk*Ks{r=Xt+$8evjAFJ>)VQm^0` z6|gWUJ1(A0wRdmjZ686QJYpWF&zl$bO1)R^&ARF!h*RaFJLcXKFYnUeVSu{DKCMLPEmsiO23qjoU%qd2d))7(n2Oe(&(`@L+~`j3^ZmdCe1+lqJNK ztEy_;t})GVL>>IJE-5BfY+-Yi$D~{9>-#4jn!15eQXt`6ty$C4(2z3Grq{o~@OJUW ze9vjHWQF7q!J`dkZ;CTJ!2b4pNzebhiR%^@7r%G+?xeMI`wTat%JC#o+c?FoU%%j` zp`s8)ZQX!v0jr^soN zY);E;=ORz>>>qkVxsAsXBAFccPaB1f0LoOR2M1rVu>so1&CLy?^KBTKf7gy=07E@{ z=f3@iPDU!S^7Yy8uDYB*D=QORxv@0-+U|9BD`oBgSRVZ`shtF}&iNDjV*5B2g;Ywl z6Y6v?aMyg4h&w(!NY)1Ws$eM;%7FW@<3mBn%HOt-t|sdeHIO_dbp@1vp=lnpQKmCRvA46XEDQv7r|W}BOZm(sm|$UBfK;N5tg=$0bj%}?~C zZ2ZML|6OJUebqtPB+j(+W$$H#?Dn@Mo2n$#y!!0uRU_pEwhFH;$F(CC8?wAM{gV5i zeqR0r=&PZdoR}cIkBaH$Yy^_;&wE5adB~Qh=$(1 zdv{FsP$t-O#sg@*n}j<|eqGFfTuMBnGQ|AD12YvdS1lFU=B!H;aDCXKY65Txpr=XS zOVY?D;S&Az34SdU+Sae*87_UGO?pyi04_YQ5g z>}pWo;1d%Y<#5dITKVYta;j|s)fy|%6iri@pPvs{YGZBoye;SH*;$NgXC>zZ9f)Vr z4{)tnzEhX&)&)`pP8;1JPA?j>f$GL>vjF;S5JWfGO&8MMRU$0c%lthTX~kcZlt)QL z)p>)&NAJt??d_KxA1rCsNP)mSefCTR<)tL6<}Q8>3zzkSsC&@GI`VJS^#xs#_rAmh xMK%N=Ndz(<0Nlz(=8t@1Kt{3u-@Tt-ZSVCzeBe3?PIeH8gs7}Yk&xb}{{=*h7{dSn literal 0 HcmV?d00001 diff --git a/src/main.cpp b/src/main.cpp index ec0c654..15370f3 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -45,7 +45,7 @@ void cudaRecordEndAndPrint() int main(int argc, char* argv[]) { - const int ppp = 23; + const int ppp = 20; const int SIZE = 1 << ppp; const int NPOT = SIZE - 3; //int a[SIZE], b[SIZE], c[SIZE]; @@ -104,7 +104,7 @@ int main(int argc, char* argv[]) { of<<","<<((float)t2-(float)t1); printArray(SIZE, b, true); - + zeroArray(SIZE, c); printDesc("cpu scan, non-power-of-two"); t1 = clock(); @@ -114,7 +114,7 @@ int main(int argc, char* argv[]) { of<<","<<((float)t2-(float)t1); printArray(NPOT, c, true); printCmpResult(NPOT, b, c); - + zeroArray(SIZE, c); printDesc("naive scan, power-of-two"); cudaEventRecord(beginEvent,0); @@ -146,7 +146,7 @@ int main(int argc, char* argv[]) { cudaRecordEndAndPrint(); //printArray(NPOT, c, true); printCmpResult(NPOT, b, c); - + zeroArray(SIZE, c); printDesc("thrust scan, power-of-two"); cudaEventRecord(beginEvent,0); @@ -162,7 +162,7 @@ int main(int argc, char* argv[]) { cudaRecordEndAndPrint(); //printArray(NPOT, c, true); printCmpResult(NPOT, b, c); - + printf("\n"); printf("*****************************\n"); printf("** STREAM COMPACTION TESTS **\n"); @@ -223,7 +223,7 @@ int main(int argc, char* argv[]) { cudaRecordEndAndPrint(); //printArray(count, c, true); printCmpLenResult(count, expectedNPOT, b, c); - + #endif @@ -237,7 +237,7 @@ int main(int argc, char* argv[]) { printf("** SIMPLE RADIX SORT TESTS **\n"); printf("*****************************\n"); - genArray(SIZE - 1, a, 127); // Leave a 0 at the end to test that edge case + genArray(SIZE - 1, a, SIZE-1); // Leave a 0 at the end to test that edge case a[SIZE-1] = 0; printArray(SIZE, a, true); @@ -254,7 +254,7 @@ int main(int argc, char* argv[]) { zeroArray(SIZE, c); printDesc("radix sort, power-of-two"); cudaEventRecord(beginEvent,0); - StreamCompaction::Efficient::radixSortLauncher(SIZE,c,a,6,0); + StreamCompaction::Efficient::radixSortLauncher(SIZE,c,a,ppp,0); cudaRecordEndAndPrint(); printArray(SIZE, c, true); printCmpResult(SIZE, b, c);