From 2446f3cd0fb04c8667e089ec1655d58690b5de71 Mon Sep 17 00:00:00 2001 From: WEIYU DU Date: Sat, 26 Sep 2020 11:13:25 -0400 Subject: [PATCH 01/10] in progress --- stream_compaction/cpu.cu | 46 ++++++++++++++++++++++++++++++++++++-- stream_compaction/naive.cu | 36 ++++++++++++++++++++++++++++- 2 files changed, 79 insertions(+), 3 deletions(-) diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index 719fa11..b0fd02d 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -20,6 +20,10 @@ namespace StreamCompaction { void scan(int n, int *odata, const int *idata) { timer().startCpuTimer(); // TODO + odata[0] = 0; + for (int i = 1; i < n; i++) { + odata[i] = odata[i - 1] + idata[i - 1]; + } timer().endCpuTimer(); } @@ -31,8 +35,15 @@ namespace StreamCompaction { int compactWithoutScan(int n, int *odata, const int *idata) { timer().startCpuTimer(); // TODO + int ctr = 0; + for (int i = 0; i < n; i++) { + if (idata[i] == 1) { + odata[ctr] = idata[i]; + ctr++; + } + } timer().endCpuTimer(); - return -1; + return ctr; } /** @@ -43,8 +54,39 @@ namespace StreamCompaction { int compactWithScan(int n, int *odata, const int *idata) { timer().startCpuTimer(); // TODO + + int ctr = 0; + int* marker = new int[n]; + int* scan_res = new int[n]; + + for (int i = 0; i < n; i++) { + scan_res[i] = 0; + marker[i] = 1; + } + + for (int i = 0; i < n; i++) { + if (idata[i] != 1) { + marker[i] = 0; + } + } + + for (int i = 1; i < n; i++) { + scan_res[i] = marker[i-1] + scan_res[i-1]; + } + + for (int i = 0; i < n; i++) { + if (marker[i] == 1) { + odata[scan_res[i]] = idata[i]; + ctr++; + } + } + + delete[] scan_res; + delete[] marker; + timer().endCpuTimer(); - return -1; + + return ctr; } } } diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 4308876..afb336a 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -2,7 +2,8 @@ #include #include "common.h" #include "naive.h" - +#include +#include namespace StreamCompaction { namespace Naive { using StreamCompaction::Common::PerformanceTimer; @@ -12,13 +13,46 @@ namespace StreamCompaction { return timer; } // TODO: __global__ + __global__ void kernScan(int n, int bar, int *in, int *out) { + int k = (blockIdx.x * blockDim.x) + threadIdx.x; + if (k >= n) return; + if (k >= bar) { + out[k] = in[k - bar] + in[k]; + } + return; + } /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { timer().startGpuTimer(); // TODO + int* in; + cudaMalloc((void**)&in, n * sizeof(int)); + int* out; + cudaMalloc((void**)&out, n * sizeof(int)); + cudaMemcpy(in, idata, sizeof(int) * n, cudaMemcpyHostToDevice); + + for (int d = 1; d <= ilog2ceil(n); d++) { + kernScan <<<1, n>>>(n, pow(2, d-1), in, out); + std::swap(in, out); + } + + cudaMemcpy(odata, in, sizeof(int) * n, cudaMemcpyDeviceToHost); + for (int i = n - 1; i > 0; i--) { + odata[i] = odata[i - 1]; + } + odata[0] = 0; + //std::cout << in[0] << std::endl; + //for (int i = 0; i < n; i++) { + //odata[i] = in[i]; + // std::cout << in[i] << std::endl; + //} + + cudaFree(in); + cudaFree(out); + timer().endGpuTimer(); } } From 2b95dc644bb19466290fd815e3648f8eb2fb655c Mon Sep 17 00:00:00 2001 From: WEIYU DU Date: Sat, 26 Sep 2020 19:29:32 -0400 Subject: [PATCH 02/10] in progress --- src/main.cpp | 2 +- stream_compaction/cpu.cu | 8 +-- stream_compaction/efficient.cu | 90 +++++++++++++++++++++++++++++++++- stream_compaction/naive.cu | 49 ++++++++++++------ 4 files changed, 126 insertions(+), 23 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 896ac2b..bbc114b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -13,7 +13,7 @@ #include #include "testing_helpers.hpp" -const int SIZE = 1 << 8; // feel free to change the size of array +const int SIZE = 1 << 8;//1000000;//1 << 8; // feel free to change the size of array const int NPOT = SIZE - 3; // Non-Power-Of-Two int *a = new int[SIZE]; int *b = new int[SIZE]; diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index b0fd02d..7f06030 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -37,7 +37,7 @@ namespace StreamCompaction { // TODO int ctr = 0; for (int i = 0; i < n; i++) { - if (idata[i] == 1) { + if (idata[i] != 0) { odata[ctr] = idata[i]; ctr++; } @@ -61,12 +61,12 @@ namespace StreamCompaction { for (int i = 0; i < n; i++) { scan_res[i] = 0; - marker[i] = 1; + marker[i] = 0; } for (int i = 0; i < n; i++) { - if (idata[i] != 1) { - marker[i] = 0; + if (idata[i] != 0) { + marker[i] = 1; } } diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 2db346e..68e8a25 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -2,7 +2,7 @@ #include #include "common.h" #include "efficient.h" - +#include namespace StreamCompaction { namespace Efficient { using StreamCompaction::Common::PerformanceTimer; @@ -15,10 +15,96 @@ namespace StreamCompaction { /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ + __global__ void kernScan1(int n, int pow_d, int pow_d_1, int* in) { + int k = (blockIdx.x * blockDim.x) + threadIdx.x; + k = k * pow_d_1; + if (k >= n) { + return; + } + + in[k + pow_d_1 - 1] += in[k + pow_d - 1]; + return; + } + + __global__ void kernScan2(int n, int pow_d, int pow_d_1, int* in) { + int k = (blockIdx.x * blockDim.x) + threadIdx.x; + k = k * pow_d_1; + if (k >= n) { + return; + } + + int t = in[k + pow_d - 1]; + in[k + pow_d - 1] = in[k + pow_d_1 - 1]; + in[k + pow_d_1 - 1] += t; + return; + } + + __global__ void kernPadZero(int idx, int roundup, int* in) { + int k = (blockIdx.x * blockDim.x) + threadIdx.x; + if (k >= idx && k < roundup) { + in[k] = 0; + } + return; + } + + __global__ void kernShift(int n, int* in, int* out) { + int k = (blockIdx.x * blockDim.x) + threadIdx.x; + if (k >= n) { + return; + } + if (k == 0) { + out[k] = 0; + } + else { + out[k] = in[k - 1]; + } + } + void scan(int n, int *odata, const int *idata) { + int blockSize = 128; + int roundup_n = pow(2, ilog2ceil(n)); + + int* in; + cudaMalloc((void**)&in, roundup_n * sizeof(int)); + int* out; + cudaMalloc((void**)&out, n * sizeof(int)); + cudaMemcpy(in, idata, sizeof(int) * n, cudaMemcpyHostToDevice); + timer().startGpuTimer(); - // TODO + + dim3 blockPerGrid((roundup_n + blockSize - 1) / blockSize); + kernPadZero << >>(n, roundup_n, in); + int num; + for (int d = 0; d <= ilog2ceil(n) - 1; d++) { + num = roundup_n / pow(2, d + 1); + dim3 blockPerGridLoop1((num + blockSize - 1) / blockSize); + kernScan1 << > > (roundup_n, pow(2, d), pow(2, d+1), in); + cudaMemcpy(odata, in, sizeof(int) * n, cudaMemcpyDeviceToHost); + for (int i = 0; i < 20; i++) { + std::cout << odata[i] << " "; + } + std::cout << std::endl; + } + + //kernPadZero << > > (roundup_n - 1, roundup_n, in); + cudaMemset(in + roundup_n - 1, 0, sizeof(int)); + for (int d = ilog2ceil(n) - 1; d >= 0; d--) { + num = roundup_n / pow(2, d + 1); + dim3 blockPerGridLoop2((num + blockSize - 1) / blockSize); + kernScan2 << > > (roundup_n, pow(2, d), pow(2, d + 1), in); + } + /* + dim3 blockPerGridShift((n + blockSize - 1) / blockSize); + kernShift << > > (n, in, out); + */ timer().endGpuTimer(); + cudaMemcpy(odata, in, sizeof(int) * n, cudaMemcpyDeviceToHost); + for (int i = 0; i < 20; i++) { + std::cout << odata[i] << " "; + } + std::cout << std::endl; + cudaFree(in); + cudaFree(out); } /** diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index afb336a..38adc70 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -15,45 +15,62 @@ namespace StreamCompaction { // TODO: __global__ __global__ void kernScan(int n, int bar, int *in, int *out) { int k = (blockIdx.x * blockDim.x) + threadIdx.x; - if (k >= n) return; + if (k >= n) { + return; + } if (k >= bar) { out[k] = in[k - bar] + in[k]; } + else { + out[k] = in[k]; + } return; } + + __global__ void kernShift(int n, int *in, int *out) { + int k = (blockIdx.x * blockDim.x) + threadIdx.x; + if (k >= n) { + return; + } + if (k == 0) { + out[k] = 0; + } + else { + out[k] = in[k - 1]; + } + } + /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { - timer().startGpuTimer(); + // TODO int* in; cudaMalloc((void**)&in, n * sizeof(int)); int* out; cudaMalloc((void**)&out, n * sizeof(int)); cudaMemcpy(in, idata, sizeof(int) * n, cudaMemcpyHostToDevice); + + timer().startGpuTimer(); + int roundup_n = pow(2, ilog2ceil(n)); + int blockSize = 128; + dim3 blockPerGrid((roundup_n + blockSize - 1) / blockSize); for (int d = 1; d <= ilog2ceil(n); d++) { - kernScan <<<1, n>>>(n, pow(2, d-1), in, out); + kernScan <<>>(n, pow(2, d-1), in, out); std::swap(in, out); } - - cudaMemcpy(odata, in, sizeof(int) * n, cudaMemcpyDeviceToHost); - for (int i = n - 1; i > 0; i--) { - odata[i] = odata[i - 1]; - } - odata[0] = 0; - //std::cout << in[0] << std::endl; - //for (int i = 0; i < n; i++) { - //odata[i] = in[i]; - // std::cout << in[i] << std::endl; - //} - + + kernShift << > > (n, in, out); + timer().endGpuTimer(); + + cudaMemcpy(odata, out, sizeof(int) * n, cudaMemcpyDeviceToHost); + cudaFree(in); cudaFree(out); - timer().endGpuTimer(); } } } From 95142af94f8081d6f979b3635e29f1649d24340f Mon Sep 17 00:00:00 2001 From: WEIYU DU Date: Sat, 26 Sep 2020 21:23:02 -0400 Subject: [PATCH 03/10] part 3 finished --- stream_compaction/common.cu | 18 +++++++ stream_compaction/efficient.cu | 95 ++++++++++++++++++---------------- 2 files changed, 67 insertions(+), 46 deletions(-) diff --git a/stream_compaction/common.cu b/stream_compaction/common.cu index 2ed6d63..c87c84d 100644 --- a/stream_compaction/common.cu +++ b/stream_compaction/common.cu @@ -24,6 +24,17 @@ namespace StreamCompaction { */ __global__ void kernMapToBoolean(int n, int *bools, const int *idata) { // TODO + int idx = (blockIdx.x * blockDim.x) + threadIdx.x; + if (idx >= n) { + return; + } + if (idata[idx] == 0) { + bools[idx] = 0; + } + else { + bools[idx] = 1; + } + return; } /** @@ -33,6 +44,13 @@ namespace StreamCompaction { __global__ void kernScatter(int n, int *odata, const int *idata, const int *bools, const int *indices) { // TODO + int idx = (blockIdx.x * blockDim.x) + threadIdx.x; + if (idx >= n) { + return; + } + if (bools[idx] == 1) { + odata[indices[idx]] = idata[idx]; + } } } diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 68e8a25..a7bdaae 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -15,24 +15,24 @@ namespace StreamCompaction { /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ - __global__ void kernScan1(int n, int pow_d, int pow_d_1, int* in) { + __global__ void kernScan1(int n, int d, int* in) { int k = (blockIdx.x * blockDim.x) + threadIdx.x; - k = k * pow_d_1; - if (k >= n) { + int pow_d_1 = 1 << (d + 1); + int pow_d = 1 << d; + if (k >= n / pow_d_1) { return; } - - in[k + pow_d_1 - 1] += in[k + pow_d - 1]; + k = k * pow_d_1; + in[k + pow_d_1 - 1] += in[k + pow_d - 1]; // 1 += 0 return; } __global__ void kernScan2(int n, int pow_d, int pow_d_1, int* in) { int k = (blockIdx.x * blockDim.x) + threadIdx.x; - k = k * pow_d_1; - if (k >= n) { + if (k >= n / pow_d_1) { return; } - + k = k * pow_d_1; int t = in[k + pow_d - 1]; in[k + pow_d - 1] = in[k + pow_d_1 - 1]; in[k + pow_d_1 - 1] += t; @@ -47,64 +47,34 @@ namespace StreamCompaction { return; } - __global__ void kernShift(int n, int* in, int* out) { - int k = (blockIdx.x * blockDim.x) + threadIdx.x; - if (k >= n) { - return; - } - if (k == 0) { - out[k] = 0; - } - else { - out[k] = in[k - 1]; - } - } - void scan(int n, int *odata, const int *idata) { int blockSize = 128; int roundup_n = pow(2, ilog2ceil(n)); int* in; cudaMalloc((void**)&in, roundup_n * sizeof(int)); - int* out; - cudaMalloc((void**)&out, n * sizeof(int)); cudaMemcpy(in, idata, sizeof(int) * n, cudaMemcpyHostToDevice); - timer().startGpuTimer(); + //timer().startGpuTimer(); dim3 blockPerGrid((roundup_n + blockSize - 1) / blockSize); kernPadZero << >>(n, roundup_n, in); - int num; + int num = 0; for (int d = 0; d <= ilog2ceil(n) - 1; d++) { num = roundup_n / pow(2, d + 1); dim3 blockPerGridLoop1((num + blockSize - 1) / blockSize); - kernScan1 << > > (roundup_n, pow(2, d), pow(2, d+1), in); - cudaMemcpy(odata, in, sizeof(int) * n, cudaMemcpyDeviceToHost); - for (int i = 0; i < 20; i++) { - std::cout << odata[i] << " "; - } - std::cout << std::endl; + kernScan1 << > > (roundup_n, d, in); } - //kernPadZero << > > (roundup_n - 1, roundup_n, in); cudaMemset(in + roundup_n - 1, 0, sizeof(int)); for (int d = ilog2ceil(n) - 1; d >= 0; d--) { - num = roundup_n / pow(2, d + 1); + num = roundup_n / (1 << (d + 1)); dim3 blockPerGridLoop2((num + blockSize - 1) / blockSize); - kernScan2 << > > (roundup_n, pow(2, d), pow(2, d + 1), in); + kernScan2 << > > (roundup_n, 1 << d, 1 << (d + 1), in); } - /* - dim3 blockPerGridShift((n + blockSize - 1) / blockSize); - kernShift << > > (n, in, out); - */ - timer().endGpuTimer(); + //timer().endGpuTimer(); cudaMemcpy(odata, in, sizeof(int) * n, cudaMemcpyDeviceToHost); - for (int i = 0; i < 20; i++) { - std::cout << odata[i] << " "; - } - std::cout << std::endl; cudaFree(in); - cudaFree(out); } /** @@ -117,10 +87,43 @@ namespace StreamCompaction { * @returns The number of elements remaining after compaction. */ int compact(int n, int *odata, const int *idata) { + int blockSize = 128; + int roundup_n = pow(2, ilog2ceil(n)); + int* in; + cudaMalloc((void**)&in, n * sizeof(int)); + int* out; + cudaMalloc((void**)&out, n * sizeof(int)); + int* scan_res; + cudaMalloc((void**)&scan_res, n * sizeof(int)); + int* bools; + cudaMalloc((void**)&bools, n * sizeof(int)); + cudaMemcpy(in, idata, sizeof(int) * n, cudaMemcpyHostToDevice); + int ctr = 0; timer().startGpuTimer(); - // TODO + dim3 blockPerGrid((n + blockSize - 1) / blockSize); + StreamCompaction::Common::kernMapToBoolean << > > (n, bools, in); + scan(n, scan_res, bools); + StreamCompaction::Common::kernScatter << > > (n, out, in, bools, scan_res); timer().endGpuTimer(); - return -1; + int* bools_last = new int[0]; + cudaMemcpy(bools_last, bools + n - 1, sizeof(int), cudaMemcpyDeviceToHost); + int* scan_res_last = new int[0]; + cudaMemcpy(scan_res_last, scan_res + n - 1, sizeof(int), cudaMemcpyDeviceToHost); + if (bools_last[0] == 1) { + ctr = scan_res_last[0] + 1; + } + else { + ctr = scan_res_last[0]; + } + + cudaMemcpy(odata, out, sizeof(int) * n, cudaMemcpyDeviceToHost); + cudaFree(in); + cudaFree(out); + cudaFree(scan_res); + cudaFree(bools); + delete(bools_last); + delete(scan_res_last); + return ctr; } } } From 73e6648ea3c74527c16a1bc18533fdc7de452ce3 Mon Sep 17 00:00:00 2001 From: WEIYU DU Date: Sat, 26 Sep 2020 22:11:04 -0400 Subject: [PATCH 04/10] coding finished --- stream_compaction/efficient.cu | 8 ++++---- stream_compaction/thrust.cu | 13 ++++++++++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index a7bdaae..006b314 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -55,7 +55,7 @@ namespace StreamCompaction { cudaMalloc((void**)&in, roundup_n * sizeof(int)); cudaMemcpy(in, idata, sizeof(int) * n, cudaMemcpyHostToDevice); - //timer().startGpuTimer(); + timer().startGpuTimer(); dim3 blockPerGrid((roundup_n + blockSize - 1) / blockSize); kernPadZero << >>(n, roundup_n, in); @@ -72,7 +72,7 @@ namespace StreamCompaction { dim3 blockPerGridLoop2((num + blockSize - 1) / blockSize); kernScan2 << > > (roundup_n, 1 << d, 1 << (d + 1), in); } - //timer().endGpuTimer(); + timer().endGpuTimer(); cudaMemcpy(odata, in, sizeof(int) * n, cudaMemcpyDeviceToHost); cudaFree(in); } @@ -99,12 +99,12 @@ namespace StreamCompaction { cudaMalloc((void**)&bools, n * sizeof(int)); cudaMemcpy(in, idata, sizeof(int) * n, cudaMemcpyHostToDevice); int ctr = 0; - timer().startGpuTimer(); + //timer().startGpuTimer(); dim3 blockPerGrid((n + blockSize - 1) / blockSize); StreamCompaction::Common::kernMapToBoolean << > > (n, bools, in); scan(n, scan_res, bools); StreamCompaction::Common::kernScatter << > > (n, out, in, bools, scan_res); - timer().endGpuTimer(); + //timer().endGpuTimer(); int* bools_last = new int[0]; cudaMemcpy(bools_last, bools + n - 1, sizeof(int), cudaMemcpyDeviceToHost); int* scan_res_last = new int[0]; diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu index 1def45e..53a0b39 100644 --- a/stream_compaction/thrust.cu +++ b/stream_compaction/thrust.cu @@ -18,11 +18,18 @@ namespace StreamCompaction { * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { + + int* in; + cudaMalloc((void**)&in, n * sizeof(int)); + int* out; + cudaMalloc((void**)&out, n * sizeof(int)); + cudaMemcpy(in, idata, sizeof(int) * n, cudaMemcpyHostToDevice); + thrust::device_vector dev_in(in, in + n); + thrust::device_vector dev_out(n); timer().startGpuTimer(); - // TODO use `thrust::exclusive_scan` - // example: for device_vectors dv_in and dv_out: - // thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); + thrust::exclusive_scan(dev_in.begin(), dev_in.end(), dev_out.begin()); timer().endGpuTimer(); + thrust::copy(dev_out.begin(), dev_out.end(), odata); } } } From 2bcb5f688a8e64b6c4e044a6c9669c9567ca5293 Mon Sep 17 00:00:00 2001 From: WEIYU DU Date: Sat, 26 Sep 2020 22:22:58 -0400 Subject: [PATCH 05/10] analysis finished --- src/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.cpp b/src/main.cpp index bbc114b..e5353ec 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -13,7 +13,7 @@ #include #include "testing_helpers.hpp" -const int SIZE = 1 << 8;//1000000;//1 << 8; // feel free to change the size of array +const int SIZE = 1 << 20;//1000000;//1 << 8; // feel free to change the size of array const int NPOT = SIZE - 3; // Non-Power-Of-Two int *a = new int[SIZE]; int *b = new int[SIZE]; From 99f7bc0285bf08cba8a5582e97f5cf8a71f74af4 Mon Sep 17 00:00:00 2001 From: WeiyuDu <32231561+WeiyuDu@users.noreply.github.com> Date: Sat, 26 Sep 2020 22:26:21 -0400 Subject: [PATCH 06/10] Update README.md --- README.md | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 0e38ddb..b70ba08 100644 --- a/README.md +++ b/README.md @@ -3,12 +3,10 @@ CUDA Stream Compaction **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 2** -* (TODO) YOUR NAME HERE - * (TODO) [LinkedIn](), [personal website](), [twitter](), etc. -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) - -### (TODO: Your README) - -Include analysis, etc. (Remember, this is public, so don't put -anything here that you don't want to share with the world.) +* Weiyu Du + * [LinkedIn](https://www.linkedin.com/in/weiyu-du/) +* Tested on: CETS virtual lab MOR100B-05 Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz +### Analysis + + From b200bb6d8c46c3b643daee7883f3f731e210541d Mon Sep 17 00:00:00 2001 From: Weiyu Du Date: Sat, 26 Sep 2020 22:28:49 -0400 Subject: [PATCH 07/10] add image --- img/hw2_nonpow2.png | Bin 0 -> 21879 bytes img/hw2_pow2.png | Bin 0 -> 21984 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 img/hw2_nonpow2.png create mode 100644 img/hw2_pow2.png diff --git a/img/hw2_nonpow2.png b/img/hw2_nonpow2.png new file mode 100644 index 0000000000000000000000000000000000000000..7a81b38ace89bb139f5004ba2ee025ba0c49e418 GIT binary patch literal 21879 zcmdtK1yq;oyDjfwyzxBqnR7nh-}18Jn`m~^P$-m55|=J2QYh5! z6bjY%^=t7CUv*m#{#Q`aTe9gsdR{h&$$5Jzjp9k? z_i#s5`Butzo9&vdPv1SIP#;d+;ww1DY|^JIz5lGwUuQZiZOae! zzv_QI8R*N)n^DPjgZIl$Dhj3GxaMmuTs=jSfocn0USIEy7dtLcR#Pa6JE`|kDDFjD zDEQ*h2bA>`%2l4#XYsk$ehM{(a(UzbRzJCV#`1dsRkO*-Lo6((bDkTYwD^8jR8&+e zy;ET9gYVc7r(7$wu7;wbkIBhEA0Hn{mv$YOX+J?hL7VaK=Oh&E?9wCTg5%BFv!m40 zj3#DgRF~$b`Rs--FD);O`D=A_t}NTj1e}mqm>J3GYf(+m=G4*AVG?%!9pHa|sI=Jk zi0xSJEc@xxmrn-DxXe!#XLP2GCS=;3yz$7=ZfmC9I$B0~8=F+U%AmO0tJd-7yf!|L z>pOJl&_KcMRrhDS{{CCRk6mTLc}_di;+%lpuzYy{A3YtNveo@h6E(H9Y1aL+djT@EU@Y_yI<-Ay;FZ%pevnk&#|JhC4#mKN?e6u^%D(Raydvq80Oq!}v zjT_@X4`pe;7IJE6K1AnLRwh09mD#Olp)Je0Us_gHmQ780#(A#SU(9Fj*H`8d(<@i5 z^ksCqme$S&uPjDByV+OqCU7kI%i}%zn4!PDy-zvKj`F1$)gOQM>{;LUk9QlDyN?7r zU)R*qDi68f5pQ^gmFa%&Sp3CE**DUdEAG}YqggdNW?Cwpbu+`wp$nrqavwi_oERUM z$EVXXt>!v*dAUxrMpbXmMQi1xf5^>MNi$NJ9;}z@bX&>2kl$8#d9ts(_Gfh{{qEiD z+}zx0#*MO=3X7i$la(Sv9Zq8^w{B%(!O(5n_U;Lj0Q;FU%^{ti?r*4%R^gcY{rg*E zqncKZU3tn;3CT;BA&08>AvwAh_v19U~;@+yNsxR!uI^wVziz_ST@N>t3aQ}%|ULheZZ;|E9ccG!Z zqoeWBDltYq-=6V1&6tFwJz+efC@d^2DJNGsso9z5LXWEyS^nMancj{i);ly5GnT)i zt(9%_EI2rr-)@*yq9y5uhvUq!VW-Zg!ovDc@y$!~LkVllf;DkXzK3O!<(NfWHQknm zbm+Hk-M@MB<~3{9M9Bo6#P(1LNtlzI&BeqTmA&MNR!@t@u%z0LX=Cqw(@SCF;ZeA| zZnLVq{8NmwZP%y!2RK*CUhzg>PI|XIlab#WHHJ&FvbC)*<<8gqIrQ?3bwt1k!vok+ zb8~Z1+PRLqMO>YO=B>p;tTRl{c}vBqr%5si*genB&&L7lBV_i(Y3F8FIda9th6Y-Yu%=e0>A^#|qTJaICs9WkQNNsW zhNG9?Mh3g8W3x@n&Z=SIrDBZ=F6oyCgddg(_*Pr1bo=)0?4_AjEFCTn;jFPpEVyWV zXJUF<1^evFqg|mJ7`QakBi_GfogQvh3U--!wfmIWQ8_ue`u7SK%s>1R3(E7LWvW@V zS@8R~!}4HE0{f98Uppdkn}gM%9)-@2ck}s_>(!p>biQG}FjXt3o6^iUv+&MYhi)d1 z;~V18^tXfF?~UrCt?cd9@Z~9dr!_-vG%w$%o_(Yxtu(Ka!N&1({cr-SUWM#i*0#+e zi&v?ssim>?%U++cmcD-d-7a3eN0yeBJY*5fmYT1&z*^SBMECj13G)m!#PO2L9;l7< z{xvaSwe!s79J{z@HxC>*AU)QRTb`_2TK2(VQW|si2=mpPVIG`ojswWxjWWc7@@(6o zvL8iWx>RNLnrMV1yyqrPD`I_+3lu|ABRCjmx4cFgY>zwkNQ}p*p zI5h0qy4A9IYr#lsM&NEfqnCTkGC!KMWmfs}$*pD8ySm^P&d06(f zLGWlxTFhTGG{;wde-)8NcDj;$^KXZlX8m%+PR*a4{n%#M<^egQ*`-Idvg@3G6*2UE z{(Jk(QYhxQFTrgoX09=RSyy(ebq$36k- zXT~4ztP%5Z7*6_1=J?NT&bHf^k9QSL)&=LR=-<5AH2byfYfZS+W6WFO`)`=6$K`sw z$YwmjuJch>(oJ3sG^a#b+S>BPsK&9|v?5o1yyf8V3Sl|9$~DU>qq{%YZL~_*Vd7?Y zf4}~=?b|2ItozD#)grbi#i(*x{~o{Qv@kc>FH;*KBZI|R7Nr#Vi06yo!f?u$qM}2c zy={&|afeTyOuuBiLuK@&x2Nav*-p3oD|yaYR}!z@Mr6p2Lg+3oDM_A1UW$HpbAEbA z{{^>>Szp5S{wCrrF5F{*s$&YkOPx9{ZRP>nrS z_+S&SPZ?Il$kLZ5OvcVGE;4$05jYpndCOhxyN{ki#r2pM|yR#=Jgt z*w3ADTmE3!mYMYH*Ds!g4-=je($XikAC^95W@Z+F@bKm3O6RP3X4hJ!P_a!5%fT;R z*fo#XN8OoGqZ6UNx~nnGSWOArN6`G;wZaFx5G<@GdL(X0SJP7I>*~Hjrd7(ZGvzg^ zV|8*G^*Co@5_{q48H?}kQSu>o9Das)V2Bry|HOP=@!Sx*J#^aU=Tl2-YhG`mnU@z9 zu#aU74Blt;2RY$;H}&-c$H&L18yz@3+&64+Psp2(zR(#V6S!NVuU1YZEn^9}`!Fl( zw>HbKOf%`ff8Xl<@#A{lT;Cp#IT5VEgT*Ap-*4ExXOFRZhS^BfOohPMPKmySvHVkC z=v-QNS^wG~<@1Wq_{D`zAd}q^dc{76)-+fhqN-|aY<$FcCb_CnmCnC)wk6#(P(P@R z`_A&xg3k2B>`1G=rlw|@aYC-1xd28pN;4~Y*9impkW$8zCNHBdCtVM5S!SfH3=cP_ zL?8tmK6bNb{ z-@AA3WnXz<;PCKp=?u50kkiagH#fK1NI7P*aL7FdIrP19UO;TB#ZoOoq>Q-SU|P0f zKH2vDnQ`?yQ8Az9Ov_N7z8@=B9UL5b2kd`V?eFgDx;C0KChVj!x?qU!(7NQOZ02Hw^;0Y@$6YheT>==R)v2?Mn-aS^04=4dk)9JgBJo%nzCCg$HpF&dBd-= zY4?ezIXO9PZEZck8yJ7< zQ&M49;(eAS1VaL5=4Z#oUJm#m}3CZXf}>;CoY*XvRZl{^~jkyQ_9t>HJQ+GpC7__1qCJ?6Upo#=+5 z&z~=`&!^|qy|M2YIHrY2qY|wwAtyBbyu)QyYoskJ<$d5`9ZrpO<@4w71i39cn6{)A zcgfpci?``qm>kM%vli8IML4QU(U;4etPnW9lG9=#vZ7*Y&Fd|BYn!O?Xu7CSnK0Grv?}2CLOpuy&gW4 z2Yi@dG|Ol<8N>_+>1xlPk$;K1h?ej@OlwM}j?@z_3w9DU0er@FZ_lk-xK^RLY8~x! z9x>yF*k>$hhP7frK|wQ0oXC_>Gb61r8X0CSiP!E{yb*{5t`c>c8CC%HVbN-z6}>i= zBNcZgMGVL@dni6NNXBTmf+Z%$jD2hew%EI*|Za(<16I9Btas4DL8 z>QHgxPxt;B1d4dUsS$w{KNM)5rvS)dgOL|px^Usbq^STVAk2&TX^dKeYGb_Cbd3}@ z+sTuvh{(Hzoik;Fg*heo9jD%hdG#kQX7%0Qz|b;OuBCTw=qMX-7;Ww7=qL)g7m<IFHllcEsP7TE9hq%yjkEnO zGm4lK#r}#nw#c$^npui`$Onq$q6FCAzju!nTN{F(Uqd^} zr$gtN$SHjW_d8^Z#>dAyj(=P?{WH|NuQf7JgRd;`l-$z7Y-3%N5>t!Y!pJF=*vmX} zD~ow`N=iyh!UhKq9-RKTj`@<0>-@kGPW9AFSTBmmz+|7RL@A2;OabpTi3?8n=9JxO zxYD4E_q4UOvCvg-S29E>hD#2%=VSmZgaWB(r|4H=aYO<``0MZTZ?(%>UcB}?Mj=#e z{&%;x?xSk^PM64g_g2fiI!+FPUl}w%5E)Ffw1=<_DB1D|XTYVEeOl0e!QK@M)V5aURIh;BrAwD? zmsY&VfBW>-U>%$7V4cLm>{uiA{K&x@MTXy=?&o)zPu+de(3)am_Q;yfnDsW`^I#QQ(LtF^( ztR-33%XMj5&33fy3iggs^}G8dIvf;#@?Q20zvsh;Pi<^F8BmXNsbpE{ns$7+@@1!o z1FGZ*#4F>L7vV|y2i#T{94~8a)j(-MN>5Vv*xS!?1a#`9G_NF_Zk*^XC2N;#Be|6| z8+I5>{j8oj_50)9b);yU9_tk8>}`xv60a1Te6)jG+eGAZHGol8R+b^c@cq5o?@{R~prpTu z^KXkyyN+KpG&H37>#u6W7#Go@$zQ*!LC{14pG^Q|!y9Tk_m_ zthgDqEY`RJih>Wr*M2V6ArbmamOob}o}W zHImU8@4@VP8RHvp+VUL&5T9AwX^9%LLjj2gFiEN?ZV4&EXWZ~JXDqL_De1-`tq)9} zp0g>D8h8jBUNyt49TDLWlaQmbM!Lx}Z*Omu0!JAb7=%zRIsz#V4=paucJdk4gdJd} zu3#Ea&$I|ZS+O6G8{zaPO@>7^Fsh;#1Gj2xx+#gUxQ@%n^ZdZnsCW4E%67Kj>VABm zD^$v#D_S)!V6Q{ZRx(nI%YL4DBQ%pm3_3pnQxt-QU6cX#ciou6ZXZOIqlVCZNNc0q zX2EgyC9ycN7LgcFV17Jw!^6YbxVU6V^2P=~A|fI}GL?GL_4}il-A~RBW!AWiNooyD zFyH!41u!5x)R@r7TPdg%#{HFy8iJQmZNy{jjpqRY(U@c~O=Q02DpD-FaSg{W0uU+S z{FAS(NpvX&lVzSa7qNEZ&bP=ggYh||iWU}$PnbpGk-1dAdNVhI&DjtegB%vE5c=Sx zX-gQQOlxapu!woSZq=h*ywCmodJ~u5zTH219TexE>P$8C1vXUcc^2_QIqABn%*$ivu;|oQmY3=ibVLY@z?G0_O%iYH z(1y*M8%;CXBLHgHjvkdJ#2qp#pJ~ep2PY?~>|4G)XRQ0)i+k@4IIio7N-xcM&Wb?0 z-2l~vn|uTLj3Xrd*g;fPccr{TP476G)jzzFSCRHpIW4Ws$dw#WOhTDoO@2&BxN?^s5fN-|c(cMJhV@hDAhwyx@-y5Xm z<8E`MI?@=zUo$f&$yS&Hu_Fy&&KMt5o_oM#v@J{T_jnhnt8U%8^-{wutH`81yS`%# zWTf8rg4+?G#J+s}`t|qxd;kd2z(#GS@~(yrUv{S4EZ(-AfkA(9Y3Z?thsWQK9-Y9j zss(9iepp#vh_D-J`O@Crj`;VrEPyZO+o?OkC<_N_!o5iO;4?f)XGcH&tNSa{3m#G~ zcX#{hH_vGm+o5KkEaR(fIbYCLaTISooRAT#sy_HEm)x3#y+0F{v{whTeRKXSP(#@*VO>!)Ru?NGz3y+V#J z3kuGX2_h2(Lc-fQqbL}w?4xqDa`8=YAFWoi3wQQ{&|$3^;`;WASDz3@(s$Qxnpj+7 zc9~Kjzx9aG0DcYZx05UidFvAKHl`KJ+NCzr^2UNwy8T!OZHDski0`vXqy1lcFN%o; zkkbOYeLewxJToUJ5Ud}eQv_W$?-ev4g`pCX<)4enFw>fxDCO>7aInD2+au8z>Mfj{ zp{u92o0*xJl%M(ZmBAT>y?NX;PM!2UV1V?H;z^O4nU(bll^8GLp@&P=%Xd0!G;0<( zxV(e!+=a|R$G{NjwmcbJxSdJy!7<+;Q-oKz)D%!ooO8Y_UxeNlP3+RXX`S| zb@csx6aLzM*Z|?b1;kxjTgmK4!d0Te`su};XJN;7_L(L9F?Xw>-@m%&K^ODc{FFyH zt#gh3G|eR_s0lz&w}Jb^(|%QH-H$*o@nHBhu)jEGyEbQP|EgFmZ^o&Ur-@-zynOj( zheQ9H(vJCF#|f;qZ-53Ko0-#f{!&*R?#$Qe{_?oiWwyP4Sdl{KexU%m)Q-aaSN{Wf zs!*k%ao>?6;^I#j3BRp0YO|R_iGR%_7AY63iiM%68GB}j80EuSe1Xk6LkEWuodo+# z*D9y1ZNA>Uu>d@KO%c<#&d$!_ddOn$F3HKMfwEn4YV*X$`(_gp^7BQYh`A3v@{{XK z?z?S{V{AD1-zsIN*CpQDbpxRE=kG?+r=e&6iF%@>tn4{rW{MOw<@v#l9M6x5 znOUSY*;6Qo?wjxYZ-%m;+(v(C@e5kBrxlET&KJVIHTZbNq{OgitasPdw0lzhV(EzO zu+4)zOM>;s`Q_ueG-WnYSjFwQ{8R-Gg)`5soLsSpYCRG4(X=8ahLKPV=|A$>vK?A> zPdl0WiaEcIbsaOUDX(oXC~{Mhw8=%v3^-{T01opIN(_|*Z7uE(<9BEoPwxvXkATE+ z8RQU&_5{F&g|RGr`0KB~hWz}x8F9+d zP}Rj#q8Iv2)g;v)TyQc{fAFB9Rr}q3i?&Qla>m2bFW-W0F+!0Eq?RPI@_DiM(c9=1=xYTaPj9pcau7KYg zUmb5_>-j0e+~xTpBGa5eC`Hy&B5{7NDtx1#)>ldF(&Wp9}2KnZ7PU$6>ZgX>HmgTII{`k&3m) zCxkXv9cDZ6{RihY&p^y+kmDem&|Hr%Q7FJR*)0CHTpBOjJX83I)Wv|daj3|K(i*jE zk#SETw@TDJKGnf$Zf;KMZ^Bxpn>4=zV|P7aJ4K#0GgJFRed)X9?B64H0~sAF=dWcj zWHDd;>+0&njaIfvhSw9;`b;c~D2z^h*4`vJ?CK8UwF=_H6xiTs*QNQv){F!cDtd0q zz-FlOYwN-!w!2K%s}ZtC1$CZ;;?=8fK@LW8=PxP}FmPwh1`vs1+oH(nPh`qu>p$sX z?rR!lc1?P-KzXq~aBlK14c6qTDH|f`adOIl+oP@2y?K*t>@Vwg@c$Pt03z>;$o_h! zXsutJbTZoMwOx0fCQ{^NhU?F^U;jk&D&;09%(KZ0{@_5Gpomu#ip z=XsQ!oxPG%8(x5E53jQb0sjNa|HULJ2%1Hn^l!A*QYfqxsx3wF=AC(@PJR8)JE!h& z4N3mW*y`nlH2&ekhkw0&&|SB=BE06&&Vj<9njiU^i(`*odW{@<@3JsM2%I422?-D; z>?ckr6R8QPcHRb<`Vz<_RFqW*y#?R#>Q^)c=TB`J2RjL=YpUB@gg^Gi{L}z#{?TMG zHnw9OT6uYSr060v3Q=k<>Kvg^R7ELAV-THoO#9b(pH7dA)HLDl+VUHSq2%jV@xW8& z4N}~>VQfkf1`vsSSWp7w{`#_Ke#~tp27;J8h_GsiU9ZNhK;XrI*c?DUwlWA7amyoV z8wxHY!uQyFMo^B@&Dx{sIaGfFtYrg#NKWxlQ?9D4E=XoQqM`oldDx<>{j8fQ#~1Bk zwevtbv82ogG8^p7&nG(=dritm>v1(=j5{v}) zVJ(!weG&nnpGo~^JE)AG%bjYoxQmMn2N!9@wI9*lopN~xQ`=Mfp32+LYBuZqbK>D+ zOz;Ch2bJ(?h}Vh%ofm`nJ25$_gk@>_b5(L~m zeBhG`nKVu-M->A;BMx-LLfjA>QU2>G?z#w!?|8+%GCEb>Lv<)jE!R_mo=Uq{@IB#` z4<8mo&D4-?+ByZwS;_~xw70WAu_xfB7g)bzM-7L%`8@aa4Fy7EgR!hS4^$)j_-o1O*)AxnzFUnbo8>Wa+bnbphY*{xo( zh+GJ}ghL16C&R?`PIRPs4c$>^U83%^6Po`n*GZv+z;~C zW@xn^w`^y9;j*6cRQy_Jr>X*%Y$hpA3#UNP6FL*qNbk7ycDY{8y8^Yr-ED{Khb-Hd zw!v}4+FBw?SxmvXNR+)AvIOKA3OGw{2^1&U2sVYAPSG{3ApK=qblq{`7ZS?7MZ@*T zD~8TRsyJaCj7nlz0v}6uhJxK4>I*bim6n#iy@WcMaCe+q*~%Ms9DRzk(~@c!3O0!D zw4Af^2k3;u-VYx>^ttsHV&I`@&DX1LMUj~;_2+IR#U6GBy+a;=RneC((YSuc?#HJl zMtnQhU%hzoV)2_*>!ofmQ@i_oJmcOwyk-1`w@^g5l)n)&yc*qdm{-1e>pM}e^WqdC zLBXD`F3UJw;H%RrS*KTGViZQ|O zBU$YkZa0Q^Ao_ytpnjrBKww{ zcwqSp^T04c4|i)Ts}rCtzPa;tVMX>38jVWv+$bqNJ($8VY%u>yo1jDt6+50Rcw}8RBgq6Oxv?nAi`H+pOf>Qo>~BE z?CtIjCE`)Izjh6%zErDTNr@Uj?}GzBR0i4D*k)kfl-%Iq4%q(MBsa5HbXTvCT=}0AaDQiir4 zA^XP290b-2IBq~}ibR2c_Xz_nWc=gv-#%er(T2b}*B}7aXIbmRwAF0fJ${^0uxlsP z(Yxu)twH)dV~S#8QT>W(OHDJ|uDBtI`#pcoa^q3n{2<5lXuAf#R>Giw? zT{Dx5@n2A}*mQkbV>{fGbZIHWv^4^tg}+WML0d?|>U}8KU)o8?W&@~n)U*KPivHeO zaQr6%KvBz>B$)$d=keQlOEVf-nVIjQpAaUzAxSA@zW%^ z2_fq?!jA-8f7teC)yEED-4Z&FtrrGz~OwXry*<*8jRE+*SnH)ZptA zg7yQI>s8;&f9yH~(}+mIHVQ?blvn3EL^qre9kCC$s$BJ&=f(pJ&Jk3tfOKra!rGww zYLJClB%qyFA(M<88-x0)94X6`m94#8H3hkde*5-NQfDI9WUMRw{{1QpX#!J^A3v@~ zl0&}Kj6F3Hiy|LDI@NL7NC^Cy{|NJ*JuxsWaj>&XnwhDs%n)-&AuAh&68`sv0$q8; z#`7O8-1Oz_>^Dtjs}oR8`j~VmdAV(p(qA5^zv7UK?P`RXo|#->F+{zq4)rd`Y4i;I z407|>M-ZpXdw1n>l#x_M#4)7Gvh9bt2>>N=jKnaqK1RUIV58UK7N>?1^0lXpr>CX> zyoAEx0#bzAryAjj47Lk|y}22;scLasm{nk0kOSpjO*DD?`EIm@Qo2u#LG7{W8C9Q&dj)L+k?O)K?`Bs$^72xGNmpo?2!|M3dAJK< z?%^KzVbFwJJB1BKU?#ErMM%H;VL^3Cfl&2GJgVUApddYPUbyAtWQ-I|f-H!0S}@Bx z@F_MR(TPXRnRw!JC5~JUKrv{Hm(Elntu?Hg)kCd|KN}49U+|mlu_cR*ZqI;ZvRL|T)ByS zh7>Q5Lr0;=Z(Q&GEtF&pcrH>(pR&LgZ3r)71=lJN2lsWT*}A3e(ko2cBVxP1_U@o# z8S5**Y0bGHvSt>M`u+L)G~}CG4oCNG@La0{R^!)m2YfwNL8EKLj`1|ie z=@rO|Mi7-95$%yraS1lCWJ}aw>D7I3um&?912VQH*D1p#55#>G03MXt7ertI3AF3R z1k*=j)Kjoh_8c>ZEsJDc4F4u(Va=MNXWuYY_^JY-O$ySm5fyM!e!=Tu$%+t*Q=aUt z@7j7$>geh`@GwAcetTViU;8J&h;H4w)hCQNH_3FNTw_0V>J*k>)3pv_2!=6-R0#yn zq~;N+Jt21{V0n2q-x#u#I{bJr8=?}zMp1$O1vR)xdjo}%X$OpP7vuIs_Obt?;(m#P z%_jW(eA=!|0y`GEzJmnM%E>7Orx_9w;u8jrT)}7R(*|x*H2Q=k-S`>}aiZkgH-b!A z*m!xBtotieu%@3i664VQ2M?kV(W0@(LA=C62$J-nUbU)saF8A15BMI+_ZA-N%WL1X zW^-143tk1%F(BGnBg0Qy!G{p)EB0VlSx69_N&oik8wZV>j}0oCA-F_{U_*w6Agz*; z684}wRFt!MgavB&g&>=dsp+U7K00>`4BgC0X69hQ$;`~mZ;OU3lc41iR|F0mI1dw3 z1F{|`JkmlF-wqnJrpH}i#xnbWqde8vDqii0SdF`fEg)NM zmX=CowA%%C-0I$h)NcrycOLc{uiPCJip5rJhjSg}fyJCBl&^?;_qpwTTYfm>=@y4m z=BfvV;0LS$4`Yi(H8(%c|7b!hik69=b^rcz2-(o~{Rke((zfcjjv^~LCB+t)y#zCW zq4fhV0U2l{EmkLA8;;%t=)=nRg|Vv(3J4|;6;I3SRrYs(T{8b6mmxgYY4h#3WlE8!Zyedo>{8yNdyv>P%lyX%0Vh=UjDMU7~ADS9#slVAbrP(4SYbbgOaMq(^6 zw?LVa@U1?{SJbrMg3A}@R&&x_!N{N-dH&+XVOY)pr;Z;_g{TB7pf$lK z_ebxM!-GHTZh34t-*)~2{c+dVi#aw~5iJTc^k0QnQ@+I{j+w`N2jAM)8Sb^nZ})-A z?JtxO&$R&mD*6W5gZF6g=WN8}Jh7nN_y2D9DA;DlIa_r%Eolrei3SLN3-lhO)}PP? zXlQ7BVE$U_uXX?QZv->}F`%m}P)*BMfYE;F<>duCx}T5FJ8=J}a~L%}h_gP|(Kajd znEYl6t2Q`lVjcq?;o#u#>P#u3hlA8FFmO-k7mE9~bCjdkAicvxD}SGc{lVmp>--mo)4@Q@l%frY^F`!l6dnB%??4*fbt zb9Dhi(Ll90Gu-5X1U2gF>PA7cC1(2Dldu#kAYRrJPKijK&@11f6x|DBLlzweYED*{ ze>yvgMj(XH(9$*#hY;eN^TO0ftJ=W8z&vr~nv>ES{TN|zGte0d;cc4r>*dwe15nC) zh1+3Pl7BCA9XaUk_5!mnfp&|diIqTd^#Fw)=|D(ie==0~#Gq&IlD7@<51>rNrnG!_+5k2F~Fr+n#qtxF?;R$O3 z{lKyKay{#pC%^O<=G0*=W9jL+0o8LiYI0L3XAP~b?Wl?dlD_V(g&CN@ca)oPRe1M1DIK4EZt5@O2J+uNUobby&0s7kyG2S>bmzLa^ zJeL?LbE7~Z5O{d)+O?UlfKekZ2n}J+pVKYaU=j%YHU{W6f(Liowr!~vUFV5RU-75< z50B97SeKY;!Hi8AFXHUx@4NLZMw*W4KePWS^Gpto=??9Hb3tyfX<*};1&3%|o41-; zCKX&UZ3l<|6RG|W4XngwxN9c;zY@W(FJGA}+J6;6{{+W!;Jc8?bb)&!5|T2dRaFkH zY0z0@qhp=_C{dB$DtH1um_$koSN5iZP*!R_GdEq3w(rl;o03;<20hD{+9iSGx8TJ9 zz3$f4xYh4;|56N?J)e*^u-6BgDpM{mg#0AkVg+pKV(ZwC7AUQ0A1;dUQd^bri1uiC z^3mIWzWJ%XDB|zj-Io-Dw!9P&N>W=jy6!UdpPw0K7u_@LTa`q!$wIJf&+hM0uQq?S zVlLSB>G4}Cio24n*I!EgYeN>dU1Iy^-mPtuqEyODD!2-dpI(iC^}k{-T14CG0w7t~ z5w&SHZ}vwYhR-1>nvq~sPpIenf1v_=p;&}@uITC;ruHm_PErps0ciD-eTReov;J0K zAME{+T38qlDeT2&d!=AT{hhdkQ1DHA_HUr2RRz&5_4WW3OL9~jBXyoSVRxYt5-S$i z*M{a^oZxw;6)*LzaJ)U`?Y4Mr(eM+Q3J0C9VBU#NbCSX#K(c&khELBuIuL_RJbdW;X!>z z50gVM9Dz|5p&+`r{vU{abiNY`Bax1XO5BQ;bO*_6TW z?2l!4k8D{6j?211XdILiR*Jg4dq`Y5Eovv5uH82A?vZp*zf~(CGlpLe*KozP{Sye9 zTuq$*Z1!E@K~nWIH(I5V*p2^*1E*a_ZzauKsQ%|lRy(YtHx~IP*qs*r_sYm{%`0=_ z6bzJX7s`?(&aaYxcKecYX=EmJwto&fK-Vl$+bXA)XrFk&>7o6@h>YM07mC#SQ+LSC z)Uze~{y6aC_o1hX7ylgT-wK=ux)$r9ME@_`nOY)4(ad)R!W;}oF#_oaj=P@Z$yo4a z*ml0yS*S`vU^>a5W5>)}#9M8rpm2ym8S@hxn_Y1Q?0F2RknG+U8mZVVM8z!h*xZPP zE|m${pVDtd5^gUl@Ou0}y=QqWZy}!Gel%iT@Aa3mWJ*B1#ub2|WVd{|XbK zKr`;!l+45^m7x54dTt&%asp=ysg`vd7$F1J<4VXtVUF-{#Eg z|2J&*Mhnn_l{zoKw3?({3f-pvOepYwNW=9$%-?^vdAvl0;@tnno{ou$lXxhN8>HbA zSGny1&lc!`2|*M)l1bNbq#BYifZO=2dM*iplp=~ZF^smOBMjzB`P-Af7Zx}U9V+VT z8W_0ZsRBOYc^-Be>KI_cKUlkzcq<2PuiGmR(Zr^dGsEC(^&lutx?J*H z0I(1!`JZ$E)KmMz^p)W5PFXm3;J~}fpv5Sxn;%snI4|G!O-*rM@eg|WK2)jO&$jsN6Agp+=f8OSt^`+% ziT`3{IOy&}%8^`Xl7!9y4;l^+J?Wh!?X@gXuv3DQc%8cf+w(LOb`6o0#T2kD)BgF3 zqBz^kGT=r}IO^Q-htFucik@3$(eo2 zI(Ff4cO{qo`Yj6Av=nHwl-E2``EW5j;PuYb=>TcDOUKXg>O~Ywq*A}=KGgD_cdLv> zOjKx4Me3R2(@_<#9Hsb9ZlS;NLF~@SGkfEN)_&L@z{;@oe*5fVh#4)zHK8AQ8^&gN zFNk%_ejOh#6&PC>Hn;DD96^MS=TD#RlVIoM^lM*s>CCn2^>m#{366@5m4;V?==!92 z8@~0xX-w}`wg;u*8@wi23Jvrk_cxnlC!UsjCMR@AYTWp2}Yd|DSVfAZxTd4fFHOsJ?l1D%FwnI21h zj~namtLWOaZ_S@YYDd+shHf@n|Hw;emcBG=^LLk;Yi6sk-C82wPuYkH zPd_)xI~_ak#X#Sb7K0-vPuMdnkRuP^$fk}l45^PI-Ld$OKA=X@xXMF!7F)?Hgk0w3 zjpQ^1*IC{1){YBQfE0;4$tAlNk!$;({}bwR#C?NcO*4Pb5-*#izIWLV?Caf|sAv+` zy=2yWi%8>FXp*3t>F{uqnzMvUq*+}chq$=8esCl=4zJ|%k0m*hzCU$}0)~l3J zngpuVNAEP6gm5ylE`1rY^cGmx*?Ekz+<6)AzkM5a_Dqh|?&l&B>>n>=Od@;Qmy zFGees%6=WUjefx;vd^WlG~|kRWfz;kt+@9ezNJj)TkjdPZW(8k`|8+qvnf0|;5fY> zgW=N~Pm2w^#(YAJ(!N`yUAGggx~U`W_dKOKZfrSAz4)$?%%yb&8gyqxR4@&;D?#Bp zeL1^@J_$$823PoKYsvm~PmQZV^Et~V;n9ZDITghFI1dK_rJ26>AV~? zi`%=RW7n>!@}!lST|+kZ3>DR_`;Cn;AA_aolXC=fW^T~?An}4M`whrgC&2CBWzG^z%%b?C|!+^hatx}@@Y4JW>;M_WXMjGaqgJkJQ zda%2Lp-}`pJO2mQMHBU;=oc{zLlF$alMw#J&HY2LQ9+?2lVOHtH2MOkyZwnXPnXrIY>MUV48e?L6eM*jjaR6JKTH{tsA7(#cjSe_{01gKR-X1GgL)I z@54Pby5yZ%3)s+Zf_;q znMv7<^bX!| zTR7a&&RK=J*stTKf<9r(N84M6D~I7uEdCA2yVLRmi&nm?7A|a4-l)e7l`hXgg_z%6 z{`7PW| zE*BZehchKRw!D3~|1Q0dS?qxa8zUB}MlN||Zk>XSrN=6ZuJPxjdmRg|#I#6tGRuW%nSImqUyd8)%Vng$W~WSX_qKURU(jmuSo^8T zPBCB8+2gJ-0!6=nKkf{p!h8pH=f{x^=`6cWvK<(G@NRoN{--zR8+PCJm1pmrP23x0 zA5~X>+v=FhCB-nzv*}6XhM5ZTLj0h(fyMPl%?eZ$;hO!blpLFZckb@)hhywva+Oh? zGqXO;NNxsie;tUD|VJQ^J}iQ%7TG zpJiLqxQT?#b^8|FI8@b?oU5Vx4%aE27M*83%^jTl z%I1&dYF<7!I~=i!@>|`5)%}l84qauN7*ET?Em#js<>dF`#UL!RzAQ39f>$6)%zij{ z3liUMp6mBsaA`#YIvIgnVP|EnLU(c5BClZP)zG-d5(5#DzKs001_k`o>e){oSH8Rb z#;tu+lpS@~Xs%*|5}}>6S@u^3tATI<@*&M}5{ia~QNuH1IuobC_!0vN;ccKTO+!c~ z0xQ9NSKQ8T*G#N#pdLQC$t5$TnTJ9-JalZ9>cpzqt<;paOMo&TJJx{cb8jH<;s00i zk0^BBk+{pu%~M-fH@sK^$I39%$%tDt%%m%Dl2=hVBtagULOv*HkB*Keg~i>YHhrb}E|l0h0-AcZc?`awAD_F{{PWe9N5+oHbzBSj(Cy`rJXq3c z+7O#d5Rcd&#PiY}YQWKB0;|;-;5)$sI?ms|Eoj`i?f>!HLczJU=6`gMoqrJDe}I%k zRCIJmS0H)j4!Cbp29XBGtah`M&{sxa!SE=Z53gk-VbFpqRd+T2+D>&mN(1i@?huZr zpHT5nvc+Yo2gGD&^ONctP@6og246vUTr9-1S1+3w!0wJr<`)2cVO1MzbXD zG9X|#nqUlJs)ZH02E?NctgoaK=w4W#(a6heQ`nIBVR6I#d;84!H}nlKul)7^ZAb(N z;$=e%Ni`Nbo)R_Kjh~nXL24uo^~4&BW<}ysAdeD*D{0|$R2I#K4TNnV*6)(aN;Mak zT=d`DP7kV7zQjWoOd&K8I|VkSvX)jHM8o1+C34Y$x* zAT8PB#Wi9=i!KWb%g+3z5p$VK{UQN@k!X#tkb8XMlpJcYFR)VVHraFR zsymN)hoFwwqyQr21D4Rl#HwQ(3o}vaY(;|`xUUM{J~#>=w40quH}5o4Eu+0pe^vxR zC<0GkQAPvBQx-hn;UejHmvf!i?&~|3KpZ-#kU+EqnZnvdya-5f#@CZ)LQwTU10*6_ zv%;ch>xe4aWUQ>M!5N#_G3NK!<)qmTsV;qACsIY8&IKdP{tFLMf7@iMfORs2W082Y zHWpUFkVS0FkM|0-3>!osdpuMUMf7+lYKB z&Ykb^?6Q76@n0Gkrs(+i2Iv7N03=){ijO$f^Nhv8_fdIpeh4OGqV~suc@i{QiHz$Y($-xq1bnL~XLu;3{>S7&SR>Y6x0t>sO ztgNb~WfC6R(PP7pK4Q9*?m51|@`mqDg~tL+3TFog2d7|4E*-Hm#u@a!{^CGeRRh}6 zm6er0^VHpGHJ1%U6D?NlAqkPiuFYxS=*cr2px>V04bmZx4@rXnZQ7hHimn@C*U)xb zvV~kKgJ(n?7YJg5Vj6)CIwP>0aZh@kJM389U16zEgQc$l=16V{(o!KQDf!&rU!3a0 zDv!eb1APa-LUeFQhYWw}3jJppTU*;d-d?n><5?kh>+{8g7lwJE(#N0yKFE~IUkaA8 zI&_3jm!Il<$xz{KGf*RnhdSV>Mv1v{!NS^Fb#Xu7yb2Jok3VVF_8yK3NuRw=!#q$5 zhM>Ks9oq06(IgEH?U9aL4G5w^BeWhD2*yJA+7nCt!r$MxDe;=##Sx2JG);2#b#>p& z%zx(}*|)D?BICB91VAw(tp~EVP^(#6X2_>c=Vf=6XKJC9V>UnL?c1~DnJof7oyBEk zbwFn-nwrCNR{U>D%FF9@R_4_(C66IRL-~1MY$_=G`g92Lrmx~$Fk&nMqorA6x!(u) zUks7^3Dt$(;XT80GBU&ha(8kLjwCdo=vWGyA4EwOg%-^rv_Pw3z=@7R`cSayYth$P zj@UOiX!J(Vp=NRk?J~+}bc02SI1_-riLGTXdKZOYA}4<*%;yg;?tqsqjGX0uwF7l> zZu3eAh6CJgb6%QU{{6Om@cu#0KFN_@nPw-=$Mpq5mDOoR5MKTXp#jlp4!A)T00Xf{ zJ41-!t2M*C5f-{IxLRH@w5*ORuZ3{{;(knG;xRl7?tNmSJRxi0g3!Be)ae96#uS{g z1d8B!DO0cAAQO}9@bjlZ?nDU(X=@;ll)xP<{b0zKM_)f@mZGE1%lW-G+Z2$Xv1>Ml zVugs|iABGvE^OXK+LSGq=50x`gXQ!=*kK)|2x*>;-FU*4?{}NKKQky!YV2ZYToBT_*!Ocba9iwtW}2i=3>4_oIK8^+*ff4VWdx zq@^Ei-LXRt&ejSv0r7F$th>RB7@DwYJHUV|3eNu>D3SjjSF^G~oP}aAW&ep6H${;U MmAx2${_36o3sZS9`~Uy| literal 0 HcmV?d00001 diff --git a/img/hw2_pow2.png b/img/hw2_pow2.png new file mode 100644 index 0000000000000000000000000000000000000000..55b5a2eac8a2d0a724d168ed4985ecf7b6a1ff5d GIT binary patch literal 21984 zcmeIa2T)e&wk`UD*kS?$C@6>lP*6ZXa#B$g6v-e+Fp-mF$rx!BB!gs-Bv~bA6akT_ zNcMA|8R z`GO*eMD9i+k$tE93qRre+SQLgD6Gy)D^ubhC(65j>Wu<|2MNQQTd zBeV3X#xwK>1&$|(PO??%d2o}HNN&1EXYt32BP9Iv`VO*PBvR>ZN;mvy zpCpNbM0!g{ei$Dw-$lZwPdz13l1MkX|Ihl$y?ejDpKCYazj*1=ej7Fc)9eq$k4KA-wZLG`fJ$|j}!KN){`Po^nU_b84~Iwe=PEou;?9Bm+3LG}hPGjlRBqvX4VU`7>>w-fq$@ zd!fyx-rl=RdkWRN^KDM9zP`z}d-v{+Li>!PM~`l^DX5_HWDru6e7Zv+P9}g^dOw$T z)2C;9RT33hWFy73bX?c6p6;N3ZP)9p#-WkfQ2t`Sm1b|r`aIW8Mqy>8glo9|37Y-; z?Ud%T%zL$EL(Y7)su#0U8ap@X!9ChtkfW)ond!7-Nu1uaBeN=dDoG`Iu31fIq{nfp zc_`8~O-_920-sS+!`fnRdX?Yt{gwl{Y=7mR*VGL2xjR@FexoU_E0b)~U$0~W*tIfr za0{}b{HEIqBW{#0%?`cDf9&KF5@N)umG3`sXFsRrX?lA4Cz=Y=v-7Pw2c-vddR;Pd z;^N|h{f^7&jjSvUMdF8jUJ`Y-T74Csy!@fH7cO25URzlV9c@ky#VLX%-Kn~jJzwnO z;Hj&t8~pX7Odu=5mtCSK-F1B_!RK!L)nLoo;^WuCJ}(qCrrr?f8yPY5_46~&KSV<% zMdG;ITkP!c>%$gWI=T~B^X4?I8@&2;g@cwWi&L#p=N>7?NbR(Da7c1mni>5Xi1!wGY9^R?C4`q8FDKPM-rrebGD>2GOT1x-bcb}M6Xnt7I^ zEvcdF_yj{_GqS%iOPeWakpc`vbrSLY=q2L=b@-P}l724Bw`x1?0rM5ZrhElyW5 z8P5%WW!EV#?CR>ublq^lomcB+7&a%V91y+#mYkfte`F-l-``&}b)zfKid`!|bA74a zRWEVs`-exCwze%ziAq-8UhNtB$>$#ZHM6h~=;h^Q)SNVMI>~1K$oHBd{)7=3oKIn8 zVParx?4+EW+@C9GO{>j{I5}q2mHQ4)-_YC&hceCS4vaJRW0hr=zVwozYNqW4SMeLy zjg?+v5zg3-Usp+1C8AdG=FO<1$Bs>;2VX^)1r!t%*p9c|GOzypSsrJhrJ;%a{rkS7 zqt_uZVnr8w9J%CH{MA}#?OCL&gSgZ(^lEWWY*-eX@iv7}0rPB$^=ht?&w3dnv-J`= zITLN~B=P)g##(Hyw|3k0ras+yP&l)De1-4$K>;&m;zs)Vj^Rf5%zHF-oaY19rl0LS zo^<}nHs4tNNavOxyr)HwRBC4q_;Z$sb86;BVSf*3P(Sy1eJ$z)0})2kv$MSOqs_cb z&T~HMSw;c$Vh))WmXQijcS~K_S{j4)oO2)b2iy zaAow;tY^+N&5gG=rd)sbu_jcYUO8R6`}gv4$fhk+Z7VG)>W#z?oK~kRS#hjW+J$Yz z@ltN&)b$JnsA+R!%kDTN+UB%05TsY-%M!1ip;u8|UA;O}&9&fSHB`sv``(22%$d26 zV{RgLlV|Yc`RuwZ+}n}w{`{L~rT?6{n%PaM`nG-Ru{;?PRZB z^-#|?3AUdu-(QQXvRYfwoH9;FP8lp-ndC*N218nJU%PB4vt9(3AL{~)Px0c>6ke=JIhXVWKC2%W4S8yGJ>(x!7Nt-U(uda zeKpx#a8O@gA3+#>$vA^;e~9GZGurhi`Cd+3l^l4`2awMcPO5AMP8{W@o^UAxwvHk4K{$%8^>pTwJ< zHZ(ixCnwJBdo|%&c+Gp=>Y9g}w_h!Ly3;c`Il2Aqtw%*VmoNWqKmM+Ad^WvkA!Pd3 zuhYKcE3PBoSo+4s#(ov{5_^Q3o10jpGbU{fr?3X6Ze-V3k2Y1cneeYTPJQ+{Mugi! z>u^BigVq?+&TQ7hhf8DQ;(Z__>WzB zS!2^X^HU?es3rA=zB(&!?%Y)G#?w~U@1XCpV-Iytoq4a^EA_<0Q)qBL8D zoPOU^LQ&CN(PSj0+pe$LH^xfu-MeScZK^|}2IjMiivnT}bN7ZrBO=TWoH4i=a>mei zj6cV$+lX0<_whlirCqn=<-HA?-uU3&OY}Ah+j7h!r|0MSiX7&7kfym26oXIpYk$t3 zLM}893k$2svl^Pw?X2iun`NKnjYnuwt+*_VtM8?w8~)|u^!taqEaH1&7P#y*&=0rn z7k_Rhm*3Ic6|bgBHhM0<-_pH$zhp2LaE4q>YKu=&`px(!iAFn>yMmxR|zN8%? zY-1GB;+Tb0U4NN}SFa{FdO+cfAcYB8RaKQnfo*q8KFKnI8lrst`m2v+WsD2SNTK%m+pHU+&fy$> z3zOZ(=Aj~XX$J(%Qr^BjEp6{QnPxpzv?yGomaeUZXZAItUOd^Z$NrMz!g$q~l3yyT zY{&^=VGXX5)%V#Z?LY6m5f*4&=_qiJNSnW|-Kvviq+&DGqlL8K%Pr+QHMa>ycX5j@NziCG-3X>snDwXSyhsO?^ zk#83zR)0yRCD5({_W8z^mX>PfxQ;fl*-Uh(RM2r;e|z@YwWZlCg(3$VWM@YWBu_k& zG}TlMp)me?aiyhab?@FiapcI6%6a-b3=!^`nVF)^7OC~gHTADt)|Q3F3vzODCW{ue zP21mJ1j_WbUV0rMvHn}q!oq@B(hARm&2d-uUzCzELUr`zWO{FBWu;Ud$iXsDzrrU1 zl+=o?V$9t*)uslo`S=U)Jtqw|7OTj=yjU)M6HfrnJkb49PXD*r+(s2G9Yu^ zSj+kL?b}?-4f85wrdy*OS;@=;%W-Fnnof8!Ng$->zdfftnQ1eoy7sG#+6ar5tw!p7SrMrz$j|?RxSgDwRPXTf^P8Bhe972|)a9=m2@rSJBTrB8{&2K=|KOmW z#FSZmgxK;AccwE24Ts%LFSoi(@&M$1X>DzduC1?^d-v|$Hfrit^aTm|He+a(EJIG; zIv-Ki8L;ov^(bKbVPOA1=w@mveSK|ZX!!}J)`l?`Gc$9gjqYOGe9A^*Ld3UX=aq4t ze!yvE1W%g%fLV#l+8O;i2BNXTqNbB?+GH~nZr9Oj#Q}8H{3YNd0Nc?g?(UtPyc>0> zn>v9dv;J&KcRi}+ollJ8W$6n2<^Xhf<7Z`4lCE2U`)A%2GEMjCx%sThSX=QGUk zn{~ZGlb5JfU_06n{b;SQhR?X=&BYh{W`6zh_LrDy9S;+C)~I;JFjj2YYu1E5CE0N@ z?-HYkZPW2cC*O&@2PcYU&{Lndk3IHoL#Oxct+MwMN@&p-f+7XXBbFDZ($#dFo1@pT zPXKPM1J~P~6OS`93x-|SM(^f~Cd8MsSIr2hB1F*QwW5;Je4WVD8(ol+p-=P=Ii_Ot){IGJHTW!9txlHvNfZ)({%uo z*0y(dY*v3;q-y5zRCc5-W(9Tbm$&uOs$cHRGEP8(DOy|T1Q>5YI|Gzj$dF?f2Y7CX z$QE{8FKkKIN#DAc^^N1=D_q!WN9Z#3ckAWSoYAZ&tEqo2s566>w5b z6=+X6-`bGqgylr1A7`6&Cvn`!Hux4RyQ+jEmlQ(gYNZd(CqeQz0 zhA99g8LPQO2`vID&sw^GbA)BR@l~7ldR;sn))tM{Ds(y6k)mRxF z;5T_SA;jk;I&(R#lL`R9^PoU>t3r};qH#~)VKK4p$B!TP=hha6ZKB(~+h<})F)w`6 z{hyzo3jW1TD zUe^uhV^vopM1?wSh|=rNsnsF6{<}{=xeVQ=rKtHB`ZrcUDEvaWbBIIYZiE0ol7SV0 zL57C(9TvueO?!_>K7G|`R=}ZS-tE^~pFB5bt%SxEkaDtN3QZwV*E0>ie!NvgtQOmi z?3BE|mm&;xhUiURkDy=mxtnd;sb|jhdC?d-=jy$CZ}aZ`cv!@N=h~F4HWgn-wz|-1 z?ukZj1k06ofBJUB*tzMClu;Ma8AxL3m6KId=YIEjC8L2Cc3enVnCeXjI36yTDz3I@ z2Sh%JQi+B`5%HLHr;2%sNpDTE!ZJSV+>yO2V#JN!KnKb9y?f z*ZW=Fy1cIqJn!=N!3ufk!|u7X=8i>tXXF7 z!@J9KBQjU6ShcyJ%%YUo)~?mnqR6N0J|<-VxU$eXwS~zkiYW7_JFVy`7?;g~C;-^l zyd^zamkTW|Ex(PWm!!ELa=6@GVAm?Ku_PB48*2cVD2z@`+i`-={SVQGaQud5fk>_H z3qZO_NZV97ZdG23#@BG4P4DhpBG3rJ(9*`H8NKRU@5Z`Xr1L`iPO(UW4ZzCMFfgPH zML2R+rxp<8OoC+7iNBr;KY0D?Q+avtheumN(K;U7zkfeA@=de}X4)=Gy6)onMonW# z>VrO)4skzt@Svixu`$!6{hLYK+YiXOtepz4@Tl56=(s;pP*Bi;1ZzZ6&2V0^bu2*$ z27}lPLB!K@adGucOqh!@9}zphvN5tMG@O{dU~4Mcf6YB6J->4&?PGE`#&}ly?x*4s8NYn zJp##e&z?PL>gn7SqKjQt3pcL!TdU}4M-+sOt|J9s1>%&&9`tZ`cPID_aG2a*zka1! z+p~9XZH$+=5s-ifTICTCc0xE~8Rbn&%lGuc)^8GZN{WfAMfHqFo2!Ri+FJN*V!n7G z&5q)9`dG=g(g&Ml6BXlXB3;*mjVFS|fl~L<(;E^;#I|KqOA8wTN4f+Q{Jtqti2Rkh z)wRX|aCNfid^~{nfiJlSs)GbTdsZ*x<#zm@?voAX(Pc2}30=ysH#~6q=Glk^k>el- zcnR9V2L0;HA`@x?@x>cCW?4BCy(JRW6aF)j+7jtJ$VfF^_Cw(n-QC^y?d^jXCOSh= zJ;kq-ARZ5*N9-FI&;?YOmgdDFq#pWbT9cN9-!1qhD0@OdzR|dSI-{-9U7o-b(n8fo8?<2`_6uolpBve zOXksEI~BnJV3llYZGVkxvu#>Bp|qgi>*j zlI}mzFz(U*lKuSoa|3RJ1ssM8#X*wWjmO+l`t=dG(*ZQXU*EkwTC~$fJebco5V#)fa%xS$yeCE6d3iW4Nc)?_`ZP6%R=#qP<08?W zq%Vo2s3Cj$Ff$5SKJFE#AbsEG+9Em`fYbwnAE7SusW$aOn5B^j@?jL5_)q zg=Hkvy!c&??mB4^{9xdaDKiU86#)ISXU_=D#%#BfYf$Nnv)>vU`CQkX!X;d_!4_3# z*CV$!Cn_Cx&jyM!Mp}KMaFj$k>%+d=-Hl}goWClv7qImyFvG+(5>cCNjs%T=7&`EO z{@&i+WBiJWivGU7wv_-ycTpTP>cn9R&GaR~`SHfkgx5s_r&M8z&`N|O9xnX9D25blZ} z_a*{D_>AL0AC6%MXXm79WXaTm(zFRj{<5^Ps;oJJWJNMp|)WeT`t+LEuRHMmUVJ zfd|k~iodFWev9al4e+=-axLx@H~aheMCaxT5?6`#3dh$yv&H_lhk}t&(zP%_;#bW` zpZNmpn@Kc(@7YcAVibu-2g)WcuI=LDl7u{_nx>gZRAXNh_3>Yup$}QrMoQ>#75;iy z;l-2=g3b`Qpw))D)EG>b3aURr&5w)(n~y!=yMO=w=koH4?o^D5I3s|=i+pC%*{zDV zr#4-uvn=qEN5cI1^QUs03~h0th2;I6WM@Bo7k6VB#{MLc`gi`jPN;O0lbo&U8?Q%p zdHA>KjH+#O7HWR%>z1}Zjc((Zemlcyqx*#hfJO^zlLUOHZ9hQED0*KWjQQ+P zy&71Sfyv1@L!a#DERY*1lO=c5&|ZXt5*Q>R$Do>QP7TA(w>k=sqVN9(L(40cU;(_`9x+=^OxQ z4#qeR+hlCAV5$XJSL^0Hi6rw@tUB(=T3l=iL#N+QuAIH(+bPK2(#h1H4#~Xj^}awt zPOmeT*3m!0XT$E75YVJv&8z5E9ak;D6?vrpruVv6#2`5=h=ay%1<{d=)M5<4GBZ2d z2;kv8m4%b!dRQ9{WGI^T-alEXJ?ZXr5jrd$^%tHQJCDr68$I1Z*p6&n$-Q6l3Lb6 zGw5|tgL9Ta0#!xJ*?bz!aJRJDAkP@ius z<1vi^X?cNTP6h6{;yYP5dyS|Lc|#G6AeIw>N7;mhg?q;+Nd6>5UGV^?&M%)vfe7=w z>NW3lI54LYmETr+hebhN;&GP)JJ&+TzQ3@ z`fThySIG8}Li3g%bM&j5Nw9{4s=zLXYE>hQ?g;OO?e7^$rKmHw9!mvKt z6+RVOJ+acsE=Zy2fiEjm3V&GLLrIFdn{1V%oP4l^Lh)}(xAL$IKiS%(Zq+7T5=s7T zSXdl;>ht_<)rUz(>)BDM2ljE1NZ(t^F2Vxv-|+(+?IqMqD2W%f;Gy_TMz(7y4Ns8h z{9oknAd$Q`nNetU>Bjb2N_awCID(LS&AAEr#BF!!Pm_8}Lx)#$x9kk3j|FXrh>cr1 zk{NYorxNTJv>qu4<=d#JTJUfuyyQapWx?C|0pua6&41b}2R7eop)y^RqR?(in-EIT z=23ON;x`Q`<9dNCeF?ly060MutEO}viE#8=XBYR9NXbV@`_vo*uWVnrBJC=aeY{-9 zjO+U@aLr7FKng(COrU!JcVmJlWX9eoS{e*po^sttCdxVbjc`IDF=-|6D<9~GNT-=+ zk}K%BlCXh^Tm=k4@C6`ivw*3=qbLFlDNXehZMSA3BfVuOziHv`!ytcth0-7anUn&#Qnuzd|T$fLk%3Pb@f(kVu=|NY@z{JN#|>>mT^| zzSN#nI}3n6b((3O98v+PMZv>XCN+ zNEP%DHlQBFewA)osD!a16I=ixp2x{d`%;jiqPWSgGpHDGE(QACt&V;qJHz3cr*j14 zK*+!N-4R0R2eeHAl|B?w-3_u-Mn>iv6ved9pDzb+>GZ6(fdYge#Wp|DsfI?r5iCtK z(S85={y|2LD|eRRU(4U$f669!rX)C7^83bML7^)1;-(#-U(uWq74XTECnHE{3d+h& zd_lAG^D0oA=Wv?QX!k>w_fvo#me*IN0^m~tcsqIO)KxjLdH>bb)$te)~tE~e581Q%e_R{uJ^(~E^U9cF(t|S zqFHaR>W8grPPwTLEFrEL~tX6^MHv{5Fp0 zA!j2;J9E;R4X1yWC*8RAT^s?}-k5Cxf6oFx>&3yO#q^_3qv^`0RZo zANk8>xFMRsSOi2jSc(`P-7mx0on}fvk~Z)&Ne5BSE)0K_^aU@(99kP_8X9}b-R*x3 znxxyA$a^yMz4C1**n!|TZ{CbP*hScWCU$KabeYkozjr$f2J82O%>VxVCJ5TEKZJZ@ zU03^A!Wjnxd+YhiDu`zk@Ub=(RkOiPfvh@^m@iHHVaY4moiiI-T)Mf0YI-2h1bxOfKHc#0l`_e?2cj3WkFR zlW&zhO$5mWim4GIXFB+9L#!W;@x?+&N2mAHxv$;bMN!p7@rcl(>d1jX*Mv1XTL+NsJ_H0KLB(wy1` z+C%|BWDKD@!tAzSlWp93?M{WqyL$bX9pNH&*Fa7*!WTxnckeL?i4uk}qJ@C~JBK{d zd1;1V=CQ|4va(8r3)?8d7sOKa!`sx>RE;1+2?&c5+yI32W+rSh1Vxd(fs3Kty*rvv zIC=16{rF$DuZ<^;-});M0$Sy866o? zSseUFZwSo^8Ui6?5QHT@*q@x;GNAT1n4*$kvJ*Vj`TFAaiOw83=#&fDQl1BN3&K%B zMu8u|as^2KJ_w!&Q8eMlOxSZ^0}F!BsHht_4+Qwa?5f!(;!vr>V99 z1z;Yz19-M-<~m50wO&q6PAV_JrWN}D5{V{j&wi38_W?d5e+aLMZ{NOUSRudzI5wys z5hx{R(2o;DEELrk;BlV`9yT^M8ENTv^~YIQ1{~LXX}$;Nob_vUX+1eBS~}90kc>T! zWI_NTke$}cbB1E`O-jD8!ZxD@=;5iV%{nrriMS05Pj@z022Y@dC7di?F>?kpv~ZIi zDF>pjdoc_o8gFktfOH6tSE!t$!_u#mfBKMQI5Or!){gyqDewN>SxM-y{xl@FkIF}> z$aD;U|M~$ZP2?Ofz(7Yjh#dkaHAkJ>gdmXt(ba^AO#`KTvQqaS`dL2`5OcP`Nm}v3 z&BqT#AKmri=&Uq-&(_9!efNXi2)>$G(h%H$h(VcrhHZ9}U4{}Ha=ubthf?7h8YK`K z7`rqAEy6)$8~C-Li}30=zz@|7vgg#T85wG^Iqxrl9LY#TMliuVfAK=e#3Y5_57uCS z6evuGs_U@Ou0LPHTThU0-@wTdpo`E9+ZPD-5Sb+oQ4G9nx(sZPY1GWV^N{H8d~z+g zPO@w#G+3CK3As~1Q-VYiczt1WDd3tSq3>a{K>&C%rqNPWB@HdA1&6U5C(n_NR*=Xf^6w5(+0t*oN$eCGX=#Pgr6^`*HaG7 zTzR5N$0BMwO<$Ca5N-cK?p;VHWEl&)K`aVS+6qlpPXH^+}B z6qbbd-0$0&s$HZJ9UWc%GjkKkEvxOSw(Fi~I^K%ms^gJONNq@betk!}j8kKfM~RRI zB=$?t{T~?V)2SoOLPF}GWZ|CtvQtOB5LJ!H&uGXVianY)@zZaPR&+n8aomh?Y5D*; z>25&hVvdUc-&Q1dxB>vfm35q^pP*EN7&|;N6J1l|w~9=X4u1Y5JG+eU8N+YPe5M`B z=%VEn6@7c0W@RUOiU{ZFsrPHS8tyWTH1r45U%PAt9Y#|G`Uiv=hx9^OKroWCu>^xX ztVZsV)`SXl$4e~!@nedmrKORVB9jexn9Wd&KC+9*g@uPT3+7rV1Octdc<%v@c|$XI z=orX3Z|#2o#P+@8zqWg8D#5{K!XFC7N2w_eEUVL5(uI9!vYR1^ptX}$ z#G*jjm?PRbw>@zeR`*<%*8A3Ib8CevE?h8Lk&JA{q)DEC=;wygJy@N5U%a^T=epj1 z01*_JX%w)KGZj?^V?jFi9qaq0qzUuhbJEGGv~B!bP!QVMuGd1I1FpmWwoo7P|C5^0 z6Zs}2Bp~snVIOq_g+et)m&J$bLr6Vfjd&{ViagwZ=ujG5lftKNAjtCa@)A*I4shRo z99wTmS=61F^ZGgCoS%edi#^UiKw~JObivqoiy=f5dnH*G=b*wZ#S9 zMTm(Y7L9-gE$jMD47|ShVlHxH^%AjDP?H0^y`#`0DEMaHt=hw9&~TRU)56Q`Tkp7y zbb?cLT|nDUbzH&e_WL4^PxV@$EEIy?dLKYQ$ttE*sqOh?&pM}s0duG<{@Yh> zWnsxhunhz-tV%O;bIPco%p*v%e#}KPUrrL)nAvc=?H%EsfG2vQL@5-|3XRQmSkdfz zdVZ9-(?W8B3Qv%QkU^FISZ2|Q!;eeYZ;SSk&Q`q#kLTdjb4_c_Gsyc@O6AZ*)42OF z$_Ja>BumR1o=k}N2YP#3ZMlO#qPBP1(P%695+c%u-Q~{%0)0Sq6Q*!Y<){5RE{$zn z0vp;JNO49$!5aD2iDtz6*@Fb;htf;?xg0-y+j*&&3$1I7t+xw@*xbdge3{ zq?1-l?1Fk{N>UNHvt6Lv*#Poukv;2Ohv$~!p~I*YThNh3E< zIg5&lTEnI?2H9O18-yQpJ}|#fuQ3^khO4rEkYU{{mTBgt6~-&-rFN>Y3tl&@e5vbt zD5^GI@!UD=h_`36b_qRp5rmu9F)__Cm#ER?NCBeS<(_4R?hMF6 z$hIJH6NyEFumc{zw%$@ehEoYrF$}4Uhpt?^ri$qS+{^6}nT7n;5~F~ZMF$4% zox7a){pMydfwZ$`WRHHbNN7uve4~qvQ6e|FArSjVM>U#Q$Vf7DJJ=*;}F}+-*4}xd6L^I7~ZouDnWW9{C|tx(A*DWF23_z;STBSgTthKr$cL5 zA@t_u=F-@(;qrWQDRJ4Qmj87s$A|y74*R@9b&7$04OL_5_wN%B-wBf&+Ozb$uAk*E zRDcVJqziz8-E4S7c{`ZKOqmNL4^Pi#@FN6_A*wa}mV~5T7by|h#{;LlD)dEys9S>< z1KA>yXb!B$N;5QqekD^V#i_*f{y%Wgjg6b^9-b;Om<03W@qb3l20A3r3iF3@RL{YAL91Af z$9yqHSz*7o4t7`iUpo)L-+eA02F%&QF6>im2jfU;O3D*|&)}2KJ@@P-O+noxT*s|N zniznAs-{JvMX1rpu?+6|<(Uu56%F`dcoB?&TJjcTrKJ^sV#|<$?j^r7A??_@^(*{R z$)FgFVZl#=01RRx9aU;5_o}k;A97O3z`)>$>JH>;36lysrKXGh0=Rs*)ChlFYnCQR zPRu@G5N*bUNe;)yF8ot>QOIRr*cbG|pdyXVKZPV?ZZSvq5S-c6CE~Oe<`@F{mq5Ob``wps{a}W+Y$oR9YDIMz7dY_$c-S)j z)9OB$AaZTu^ka=djR;%(cuX$3iDoz55&h#suQ`)H9mV!?JDT;ceulB&UQN%we)`yV zvK2{^aa0$tt5=mDxrsz{w}n9?Mu9tq;bSB-<3BW>VQnCWsXrC+sMs;BbW=xJ zDc0&c!KO;tvI-7T4!43-f9@uc!8*`p@1!5-Q*x4?o<;L=LtUT9_IK>YhyR@1;^#*o zHuu6~fBv@nJH2=MnztR{PmpH@H%rq`&m3yfE^g*0`yt}Pb4rgB{a<9ni7C_*d+M&$ z9q7E9!mPXXLGWxnzkMK~sr~sC48)^(rG{Q{Z^N<~iS=8r% zSG!Bcvg~K|2p>pvDsp>Av2y{W8s!TrM4Bjsf8&19zZXWxFi_kS;iOOx#Gd_+-JDno zeU|juJdlyhA0+nRX>*UNY%!&OOA!xRF>vhNd+H$R(W6J`kG?n9!i)?gB$Lx^dn3Er zvkbVDd!mdGZQSow-Y4BpzMu>@2>Vg@0_~;QM0TF%EE>M&< zH7OI^G1zlUgDId|aJT+_mhk)$;{b%+mKdnP7&iN`B7xvEp{{_jRd#U5|6{0nL>Q_X zuTDH8k!V?o_X2o(Pw;@ABz*gr;!6Xdy$VK<7#l|o4(hWdj932t{r{9ZVCx#B3Wh;X zZuJfxf@K2r_?9r~;x-L2T|j6{^O#T>$X$Q)<^+hN_JoFp2B%aeJe|}2f0M35n}IJ$ zpMFBbW%Yib!iO+HAt6G>$a_h8`EUD8Kt%wNs&HaASwnNP%FcuQP3RUGJ5aw&q)Zk*L zrKM$&CI-tw_)U{QN1s6BO$>SxdlvHWz5z25X}zSVfcj;Uc$47z_!Vt#^{qz=PH_ca z=h*d-^1DSpqUdbSt z<#8&JGBPr325aaE!V{NEC_T9O0C?r3*}1rax?Khy4`F0^zAvw@*7<=%vfjf`Qt+A24QYmwar2+|GMpg( zvQLr>-+D{8?J&MuE{|$UIu&2twFRHB)_&TXzAU$pn3%5twT)IpREyueZ2Y>`KH~eY zzU0^>U*lp|R?^kIS2HsGS|r}>y_cvwX1>s4vR#*s#0Au+k) z)ElxgEm;aUiyPaH5}kzov@)j7Z0&LH8eeSF{k_H*mNFJYLhVs%a($oP99*(*hoo#k zK+?utoysTgDT!-5eNo9~UN+pl;h%$GHlU};{+~N~b=3BLe%K+eyBS$xAN2eh`xM51 z&Ar^IM@DlNubJ2uT6ScyODO1&rV2Tim~DM{*&4~5?9U-(om2xIxK8>uPMC2|S{F1s zs_$MTE4&Dq80S;LFMqns9V^}%yXnt4M@*>P^F3=$i#*l+Mcn9rJhi9ff8J(6oQ(7$ znz)PWJN}CY{onjbLOWI^+)v`>o4K~uq@<##*jxkMSmw{Sa)KGRpQXe)x*g0OkbknT z(CTutr@O9I%NK*^O7(v~&^UcWwy)74n34Ld$e&NW$EPd>iW5&PvX)Eze%P1gIsK(u zP@$SHU=QVEp)`cT;~J*ZXG(sXmd|N(6lBDgTiNP+-gZcDS^b$aKuyQWLa}SxwwteJ zx2$&D$Wsd~X-g>za2%liUHg)v^y7k)Lh9MB1JY0L7K}PbfL^8F_9^vi1Xq%5)t19& z3O6%UWHa{0m&d)Z$zQj=SHn2gGVamtaCnJquU~Y=%$8O5WOgMR=PZYcg{HT$w=CNj z^(xOFSJY5Qda-+mrTy-9x&!w=$}b9!gm%W3Ih*o~^J{qsrhj{`xc0s7Qq9Zl-w&K{ zyJgflZTh9xhf9*9S1K_nU2*-T6GxFb8_O<%yZDX9`vm!|m)IX|8BbqvJTulKWO9X4 zJ;!Pbvo!rBa))@qs81ay>z!$juZ^55kXh;Eszo&Zcur3Dbd{`RUl3>Dk(gSww)dS` zvDVKfryM2p&3E~lR^}Miy7rH#m9HmLwI$5yP!CGi?^ht-?R&(+=Sv%XLygS?)q0jD z8Q#{-bE7ndpTm7vV>j($-*%G@_uDwK+}LXW#Pm(3YuuNlFK0gTek_a{roriY9Q$+! z(v6+h49=(em(-=V9@{^>t~33ReBz4SfwZCQ^#taTQIYj_md(s!Y7FwrCDc>H1|%~Q z6Zuxj=K1C?g5OrR6Rx%b$2i@!^jeB(#Fos2k>qsqQBl%h6zOR+*>UnCjBmGu8948xx|fCF_es&~p=j$v zg{j?yutFzx{X97&KlGtv@~U)6;_OY+cV&$ObN4lRH~r!XmA&S5>0McSgxkHAxNYCj zLcs$38UP5&&=_Vs3MW@#)T@_)0~N3iT0A(7Whxm5=ekdiar_qB8;X7*w>RatEd6wg zjW;9f4s2@jdDkG_&OntnRF^jwHSZb4QiV)qLM1Lw@i+q+w>3>Gxv)^QXztsUKO{yO z)4Dv|(_Sjt+C77-n2)*K-roMDs_H9LGN`99n5DnUdoY09{2MQuKTJr(ao$-&$bMh8XFB8<(DeFs%?=C?<|oXHgiC(C6^hl+BfdrxZ=* z7(hAIh~2W2c%CL_b5qj^aq;znjom>MRO>mTfdixc8`dRfW+v3yYd5rtTz`3*g%)cr z6l_eyuW4J$u5;ObnI5EAx^`jAI{1q0Pt`z@?ZW%S8!{SEz;1Zx>*^Z9LuI-iN<22G z83x>t%>_}&y5EOI3u_)sKIhDJS>kuR=ca6l`=#qC{Hl+x);z0=Zjv}#CG}9$xmZ1j1fBE|;J%3{8%9prJ53UStxOTsaaj)YG@!v&i_?lnC=jMx4_V8}mX$?>3 zpe1;xP;oy*Leq4~DQ)$p>6I-p3+o6Q`9{_m;H2H%RXZSOB!2UFxm^KE36s_pMa|3} z>bt{3p7u$~)znlfN$xH^uM!qF(aSvVt0*Gfq}BSl<0i;12C7TR8FnfvDyeldT7%Q~ z%?yf}YlC?&0Ze_U6px$=^e^@Brx>X8{JK5sTl&!+e>F9;Ma9HJ8f~v)9$DP+_xYIM z_O%!vnYX-XY%Kfq`N5|r%hp|GzNxE>*arOd*Jp37BF(QRa-^F_v|f{>q@>uz#Izt} zJIsB(^r#R20Z^nEx}k>=8+J}kGYiJ=pg$p%4jZpW0Mo&K-d*^;_50h-xtYhxYfkf< zX$5w){%y|qcJZ&jP8=>!y>>aE%w-}Ypj1~aJAsG(y|LV3zTaH~9K*WV$4PJfITc7O zY!VVW7@0gE?vzWkKg0$DplPjlS>ToahEcv|IGw&9e+b4k4egKtcpd_%5vB{sf*3u~ z(p#rLhf-!Zpj0+DX1A{9Xq3kA)-mt(JTA#oZcO*Kur+^gPXxOWCq<-@SLGXA$OX4dD52 zDH72E(}qQJHKRTitNOl)Q;=PQw|3_h@WvMqu{3bF}aA%#33$i{}Hd#jVTBgx@Y zJ-hOs;;R3u+&AhLZ*SWXRC2L?Xf`+uue#U ztSrQ$Ugha0wQnug)?Si7a3fTEbm;9S**6v(ox~%T7bcQncD}xFC0X)=@RO%D+jfr8 z9B5%XlcHoGY!tquOy1NqwNN{K+}2ooXC-~Xd-@kD^p~3o- zu=;#WuWn095irg^seh1TC0@3%?-e8Ygq0ve{#5Zwzx7Po+WgyV(YM0Sm;KMvCf@mV z1hA3x;s?>K{wD}&hmx$UEWwPS2cg-!H%4SKR}wA?<-@y9i-aT+I2Yr;pU39mt}}$S zfg?g-hvfEsr@ULoRG^r_OfQZJH+|MPzSjAcq<8NWh%q_%0>P9C*cWMIx)K6HI1l$b zp^nb@;t9ARf?x!K6>SUa4(jNXMs#d^cry~xDK-W1hAI_%`@F4&wT(T(&CShh0s?9f zr4?`{!dHYL#1Q`$Q6vRI|HXI%nxgAdb*mrdhlRTNEMh zX{YBr@_iDt&@#fb@!Oq;mzVD*gmLthki;8czy+D!{Xx36xks1~$BD^C^j`$+NcZaeO6)lGM}^E*-MJ z;8N22sbA~w2XE&v<#`a@rpb$UY)bKTgt1im*+(ztJ0P7uR6aOc`s==E1w@`le_rIJ z$ry5nYoRC?FcTh;&c{e=rsJX+ zF^q+QQ$F5*t`tLJhcIU=4cA>qo>c-d|A(_*p%(q+);@jJCp0t}k~0`et%*8aSC?6N zto^xj5A^CIQsJ7@^gRRD1u>M3-A+?#j6n4b(Ggl0Z9XZet)kLAnsPXVkDq_^UFD(1 zcXul1V79xrFvU*1P6p8i;j!fVtKO+rlp?^^Si;9U_+2K9{pm{CyKjnZ35#l_mTj@287+{Ny&f{-;} z@{pCGbvbbGU~^mB=VGl|{TWQX5ihGDtYk9M(uUJcV4+}#*BGydQ;D$U6OoFI)r4t; zxzOHKDeQe>n-Y#$BP2P(msa~yFA0%Hyf)xNB}NY#aUq16g&0`l;*!I=`3QD7u3$3% z3jSM>@U}V4Nbgk&?Q<|5n0J`>i(I{cO}SjHvwpOsm#Hv~^~jNPpj7qBC+|&|Dyi-7 z!``qLJp@v=Bqk;#smKOiU3pvE><}wNHeM8jmvqQ7+8k(Mlcxy_XH0bT2kdx)2|tq9{AY%l zPHHywCgJcd>zk%S&8om_A3aLeCd>z)@>k8bjN%$i#gA9D;VbQ5MU)*oo0?M`4!)9a z8No|&1c;HQWAH!I@!b9U9Y&63$wOm{Y?9u*IgNo6`m(Y41K>8LFp%L%m~JU4$Or$L zy4l*XQ3&VJ2O&&e31DeGVGTQm@wodK67`swv79|`$IV}M+T%tCK^E>(&HW1%9wr($ r6!`xYvGiX7p#KFI_5ZRNJCa<+ literal 0 HcmV?d00001 From 78578324e8978d9b745d25a5faafdbe2835b7637 Mon Sep 17 00:00:00 2001 From: WeiyuDu <32231561+WeiyuDu@users.noreply.github.com> Date: Sat, 26 Sep 2020 23:17:27 -0400 Subject: [PATCH 08/10] Update README.md --- README.md | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b70ba08..33b16ac 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,71 @@ CUDA Stream Compaction * [LinkedIn](https://www.linkedin.com/in/weiyu-du/) * Tested on: CETS virtual lab MOR100B-05 Intel(R) Core(TM) i7-6700 CPU @ 3.40GHz +### Plots +1) Plot of time elapsed in (ms) versus array size when n is a power of 2 (x axis: 2^8, 2^12, 2^16, 2^20) + +2) Plot of time elapsed in (ms) versus array size when n is not a power of 2 (x axis: 2^8, 2^12, 2^16, 2^20) + + ### Analysis - - +When the array size is small, we observe that cpu method is better than gpu ones and naive scan is best of the gpu ones. Possible explanations: 1) When array size is small, computation time difference is very small and accessing memory contributes to the largest portion of time. That's why gpu methods are worse than cpu. 2) Work efficient has up-sweep and down-sweep stages. Even though it has the same time complexity as naive method, constants matter with small n. + +However, when array size increases, we observe that cpu performance quickly deteriorates and becomes than work efficient and thrust implementation. Among all the gpu methods, thrust is the fastest, work-efficient scan comes the second and naive scan is the slowest. This is as expected: 1) cpu method has run time complexity of O(n) while gpu methods have O(logn). Therefore, gpu performance is less susceptible to increase in array size. 2) Work efficient scan requires only one array while naive implementation has to access memory of two arrays. Global memory I/O is the bottleneck here, causing naive method (with heavy memory access) to be even worse than cpu. 3) Thrust utilizes shared memory while naive and work-efficient both uses global memory -- accessing shared memory is faster than accessing global memory. + +### Output +Array size is 2^20. +```` + +**************** +** SCAN TESTS ** +**************** + [ 19 36 40 30 35 35 17 8 28 32 41 40 15 ... 44 0 ] +==== cpu scan, power-of-two ==== + elapsed time: 1.7577ms (std::chrono Measured) + [ 0 19 55 95 125 160 195 212 220 248 280 321 361 ... 25698986 25699030 ] +==== cpu scan, non-power-of-two ==== + elapsed time: 1.9503ms (std::chrono Measured) + [ 0 19 55 95 125 160 195 212 220 248 280 321 361 ... 25698890 25698926 ] + passed +==== naive scan, power-of-two ==== + elapsed time: 2.7335ms (CUDA Measured) + passed +==== naive scan, non-power-of-two ==== + elapsed time: 2.73654ms (CUDA Measured) + passed +==== work-efficient scan, power-of-two ==== + elapsed time: 1.32346ms (CUDA Measured) + passed +==== work-efficient scan, non-power-of-two ==== + elapsed time: 1.30934ms (CUDA Measured) + passed +==== thrust scan, power-of-two ==== + elapsed time: 0.405888ms (CUDA Measured) + passed +==== thrust scan, non-power-of-two ==== + elapsed time: 0.328032ms (CUDA Measured) + passed + +***************************** +** STREAM COMPACTION TESTS ** +***************************** + [ 1 3 0 2 1 1 2 1 0 3 1 2 3 ... 0 0 ] +==== cpu compact without scan, power-of-two ==== + elapsed time: 4.1676ms (std::chrono Measured) + [ 1 3 2 1 1 2 1 3 1 2 3 3 3 ... 1 2 ] + passed +==== cpu compact without scan, non-power-of-two ==== + elapsed time: 2.6659ms (std::chrono Measured) + [ 1 3 2 1 1 2 1 3 1 2 3 3 3 ... 2 1 ] + passed +==== cpu compact with scan ==== + elapsed time: 10.0887ms (std::chrono Measured) + [ 1 3 2 1 1 2 1 3 1 2 3 3 3 ... 1 2 ] + passed +==== work-efficient compact, power-of-two ==== + elapsed time: 2.32755ms (CUDA Measured) + passed +==== work-efficient compact, non-power-of-two ==== + elapsed time: 2.18624ms (CUDA Measured) + passed +```` From c94ee7417c708734d134cd2589a4f046d5252e50 Mon Sep 17 00:00:00 2001 From: WeiyuDu <32231561+WeiyuDu@users.noreply.github.com> Date: Sat, 26 Sep 2020 23:18:55 -0400 Subject: [PATCH 09/10] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 33b16ac..967b123 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ CUDA Stream Compaction ### Analysis When the array size is small, we observe that cpu method is better than gpu ones and naive scan is best of the gpu ones. Possible explanations: 1) When array size is small, computation time difference is very small and accessing memory contributes to the largest portion of time. That's why gpu methods are worse than cpu. 2) Work efficient has up-sweep and down-sweep stages. Even though it has the same time complexity as naive method, constants matter with small n. -However, when array size increases, we observe that cpu performance quickly deteriorates and becomes than work efficient and thrust implementation. Among all the gpu methods, thrust is the fastest, work-efficient scan comes the second and naive scan is the slowest. This is as expected: 1) cpu method has run time complexity of O(n) while gpu methods have O(logn). Therefore, gpu performance is less susceptible to increase in array size. 2) Work efficient scan requires only one array while naive implementation has to access memory of two arrays. Global memory I/O is the bottleneck here, causing naive method (with heavy memory access) to be even worse than cpu. 3) Thrust utilizes shared memory while naive and work-efficient both uses global memory -- accessing shared memory is faster than accessing global memory. +However, when array size increases, we observe that cpu performance quickly deteriorates and becomes worse than work efficient and thrust implementation. Among all the gpu methods, thrust is the fastest, work-efficient scan comes the second and naive scan is the slowest. This is as expected: 1) cpu method has run time complexity of O(n) while gpu methods have O(logn). Therefore, gpu performance is less susceptible to increase in array size. 2) Work efficient scan requires only one array while naive implementation has to access memory of two arrays. Global memory I/O is the bottleneck here, causing naive method (with heavy memory access) to be even worse than cpu. 3) Thrust utilizes shared memory while naive and work-efficient both uses global memory -- accessing shared memory is faster than accessing global memory. ### Output Array size is 2^20. From 08ba87c694963d142bb1337ec21382ffc6a164b6 Mon Sep 17 00:00:00 2001 From: WEIYU DU Date: Sat, 26 Sep 2020 23:25:36 -0400 Subject: [PATCH 10/10] fixing style --- stream_compaction/common.cu | 4 ++-- stream_compaction/cpu.cu | 5 ++--- stream_compaction/efficient.cu | 6 ++++-- stream_compaction/naive.cu | 3 +-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/stream_compaction/common.cu b/stream_compaction/common.cu index c87c84d..716f58b 100644 --- a/stream_compaction/common.cu +++ b/stream_compaction/common.cu @@ -23,7 +23,7 @@ namespace StreamCompaction { * which map to 0 will be removed, and elements which map to 1 will be kept. */ __global__ void kernMapToBoolean(int n, int *bools, const int *idata) { - // TODO + int idx = (blockIdx.x * blockDim.x) + threadIdx.x; if (idx >= n) { return; @@ -43,7 +43,7 @@ namespace StreamCompaction { */ __global__ void kernScatter(int n, int *odata, const int *idata, const int *bools, const int *indices) { - // TODO + int idx = (blockIdx.x * blockDim.x) + threadIdx.x; if (idx >= n) { return; diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index 7f06030..2f94be2 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -19,7 +19,7 @@ namespace StreamCompaction { */ void scan(int n, int *odata, const int *idata) { timer().startCpuTimer(); - // TODO + odata[0] = 0; for (int i = 1; i < n; i++) { odata[i] = odata[i - 1] + idata[i - 1]; @@ -34,7 +34,7 @@ namespace StreamCompaction { */ int compactWithoutScan(int n, int *odata, const int *idata) { timer().startCpuTimer(); - // TODO + int ctr = 0; for (int i = 0; i < n; i++) { if (idata[i] != 0) { @@ -53,7 +53,6 @@ namespace StreamCompaction { */ int compactWithScan(int n, int *odata, const int *idata) { timer().startCpuTimer(); - // TODO int ctr = 0; int* marker = new int[n]; diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 006b314..3288a91 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -27,8 +27,10 @@ namespace StreamCompaction { return; } - __global__ void kernScan2(int n, int pow_d, int pow_d_1, int* in) { + __global__ void kernScan2(int n, int d, int* in) { int k = (blockIdx.x * blockDim.x) + threadIdx.x; + int pow_d_1 = 1 << (d + 1); + int pow_d = 1 << d; if (k >= n / pow_d_1) { return; } @@ -70,7 +72,7 @@ namespace StreamCompaction { for (int d = ilog2ceil(n) - 1; d >= 0; d--) { num = roundup_n / (1 << (d + 1)); dim3 blockPerGridLoop2((num + blockSize - 1) / blockSize); - kernScan2 << > > (roundup_n, 1 << d, 1 << (d + 1), in); + kernScan2 << > > (roundup_n, d, in); } timer().endGpuTimer(); cudaMemcpy(odata, in, sizeof(int) * n, cudaMemcpyDeviceToHost); diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 38adc70..bbab361 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -12,7 +12,7 @@ namespace StreamCompaction { static PerformanceTimer timer; return timer; } - // TODO: __global__ + __global__ void kernScan(int n, int bar, int *in, int *out) { int k = (blockIdx.x * blockDim.x) + threadIdx.x; if (k >= n) { @@ -46,7 +46,6 @@ namespace StreamCompaction { */ void scan(int n, int *odata, const int *idata) { - // TODO int* in; cudaMalloc((void**)&in, n * sizeof(int)); int* out;