From 7de4ba053f84c373a38a3c12bb6842452dc67de9 Mon Sep 17 00:00:00 2001 From: Terry Sun Date: Sat, 12 Sep 2015 15:46:13 -0400 Subject: [PATCH 01/14] Add CPU implementations. --- stream_compaction/cpu.cu | 44 ++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu index e600c29..99c7880 100644 --- a/stream_compaction/cpu.cu +++ b/stream_compaction/cpu.cu @@ -8,8 +8,11 @@ namespace CPU { * CPU scan (prefix sum). */ void scan(int n, int *odata, const int *idata) { - // TODO - printf("TODO\n"); + int total = 0; + for (int i = 0; i < n; i++) { + odata[i] = total; + total += idata[i]; + } } /** @@ -18,8 +21,25 @@ void scan(int n, int *odata, const int *idata) { * @returns the number of elements remaining after compaction. */ int compactWithoutScan(int n, int *odata, const int *idata) { - // TODO - return -1; + int count = 0; + for (int i = 0; i < n; i++) { + if (idata[i] != 0) { + odata[count++] = idata[i]; + } + } + return count; +} + +/** + * CPU scatter algorithm. + * + * @returns the number of elements remaining. + */ +int scatter(int n, int *odata, const int *indices, const int *input) { + for (int i = 0; i < n; i++) { + odata[indices[i]] = input[i]; + } + return indices[n-1]; } /** @@ -28,8 +48,20 @@ int compactWithoutScan(int n, int *odata, const int *idata) { * @returns the number of elements remaining after compaction. */ int compactWithScan(int n, int *odata, const int *idata) { - // TODO - return -1; + int *predicate_data = (int *)malloc(n * sizeof(int)); + for (int i = 0; i < n; i++) { + predicate_data[i] = idata[i] == 0 ? 0 : 1; + } + + int *scan_data = (int *)malloc(n * sizeof(int)); + scan(n, scan_data, predicate_data); + + int count = scatter(n, odata, scan_data, idata); + + free(predicate_data); + free(scan_data); + + return count; } } From e1962098d6d03dbe4b4b59110074a6338ffe3d68 Mon Sep 17 00:00:00 2001 From: Terry Sun Date: Sun, 13 Sep 2015 00:15:02 -0400 Subject: [PATCH 02/14] Add naive scan. --- stream_compaction/naive.cu | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 3d86b60..875f4e6 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -6,14 +6,43 @@ namespace StreamCompaction { namespace Naive { -// TODO: __global__ +__global__ void kScan(int d, int *odata, const int *idata) { + int k = threadIdx.x; + if (k >= (int)exp2f(d-1)) { + odata[k] = idata[k - (int)exp2f(d-1)] + idata[k]; + } +} /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ -void scan(int n, int *odata, const int *idata) { - // TODO - printf("TODO\n"); +__host__ void scan(int n, int *odata, const int *idata) { + int *A; + int *B; + int array_size = n * sizeof(int); + + cudaMalloc((void**) &A, array_size); + cudaMalloc((void**) &B, array_size); + cudaMemcpy(A, idata, array_size, cudaMemcpyHostToDevice); + + for (int i = 1; i < ilog2ceil(n)+1; i++) { + kScan<<<1, n>>>(i, B, A); + checkCUDAError("scan"); + cudaDeviceSynchronize(); + + cudaMemcpy(A, B, array_size, cudaMemcpyDeviceToDevice); + } + + cudaMemcpy(odata, A, array_size, cudaMemcpyDeviceToHost); + + // shift odata to the right for exclusive scan + for (int i = n-1; i >= 0; i--) { + odata[i+1] = odata[i]; + } + odata[0] = 0; + + cudaFree(A); + cudaFree(B); } } From df438b62521dd762a75923ed938487721c462f50 Mon Sep 17 00:00:00 2001 From: Terry Sun Date: Sun, 13 Sep 2015 15:34:32 -0400 Subject: [PATCH 03/14] Efficient scan. --- stream_compaction/efficient.cu | 45 +++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index b2f739b..21cf6c3 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -6,14 +6,53 @@ namespace StreamCompaction { namespace Efficient { -// TODO: __global__ +__global__ void kUpSweep(int d, int *data) { + int k = threadIdx.x; + int exp_d = (int)exp2f(d); + int exp_d1 = (int)exp2f(d+1); + if (k % exp_d1 == 0) { + data[k + exp_d1 - 1] += data[k + exp_d - 1]; + } +} + +__global__ void zeroLastElt(int n, int *odata) { + odata[n-1] = 0; +} + +__global__ void kDownSweep(int d, int *data) { + int k = threadIdx.x; + if (k % (int)exp2f(d+1) == 0) { + int left = k + (int)exp2f(d) - 1; + int right = k + (int)exp2f(d+1) - 1; + int t = data[left]; + data[left] = data[right]; + data[right] += t; + } +} /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { - // TODO - printf("TODO\n"); + int *A; + int array_size = n * sizeof(int); + + cudaMalloc((void**) &A, array_size); + cudaMemcpy(A, idata, array_size, cudaMemcpyHostToDevice); + + for (int d = 0; d < ilog2ceil(n)-1; d++) { + kUpSweep<<<1, n>>>(d, A); + } + + zeroLastElt<<<1, 1>>>(n, A); + + for (int d = ilog2ceil(n)-1; d >= 0; d--) { + kDownSweep<<<1, n>>>(d, A); + } + + cudaMemcpy(odata, A, array_size, cudaMemcpyDeviceToHost); + + cudaFree(A); } /** From dfd2b37037c08d330e0da23636a69812a15d7c29 Mon Sep 17 00:00:00 2001 From: Terry Sun Date: Sun, 13 Sep 2015 16:15:01 -0400 Subject: [PATCH 04/14] Fix work efficient scan for non-power-of-two arrays --- stream_compaction/efficient.cu | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 21cf6c3..929fa2d 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -33,7 +33,27 @@ __global__ void kDownSweep(int d, int *data) { /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ -void scan(int n, int *odata, const int *idata) { +void scan(int size, int *odata, const int *input) { + int *idata; + int n; + + if (size & (size-1) != 0) { // if size is not a power of 2 + n = (int)exp2f(ilog2ceil(size)); + idata = (int*)malloc(n * sizeof(int)); + memcpy(idata, input, n * sizeof(int)); + for (int j = 0; j < n; j++) { + if (j < size) { + idata[j] = input[j]; + } else { + idata[j] = 0; + } + } + } else { + n = size; + idata = (int*)malloc(n * sizeof(int)); + memcpy(idata, input, n * sizeof(int)); + } + int *A; int array_size = n * sizeof(int); @@ -50,7 +70,7 @@ void scan(int n, int *odata, const int *idata) { kDownSweep<<<1, n>>>(d, A); } - cudaMemcpy(odata, A, array_size, cudaMemcpyDeviceToHost); + cudaMemcpy(odata, A, size * sizeof(int), cudaMemcpyDeviceToHost); cudaFree(A); } From bc60292d1bdfadd0d3b455bb1e0a88b2bc7e39f9 Mon Sep 17 00:00:00 2001 From: Terry Sun Date: Sun, 13 Sep 2015 17:40:34 -0400 Subject: [PATCH 05/14] Fix naive and "ping pong" instead of using memcpy. --- stream_compaction/efficient.cu | 49 +++++++++++++++++----------------- stream_compaction/naive.cu | 14 ++++++---- 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 929fa2d..ed47710 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -15,10 +15,6 @@ __global__ void kUpSweep(int d, int *data) { } } -__global__ void zeroLastElt(int n, int *odata) { - odata[n-1] = 0; -} - __global__ void kDownSweep(int d, int *data) { int k = threadIdx.x; if (k % (int)exp2f(d+1) == 0) { @@ -30,6 +26,22 @@ __global__ void kDownSweep(int d, int *data) { } } +/* + * In-place scan on `dev_idata`, which must be a device memory pointer. + */ +void dv_scan(int n, int *dev_idata) { + for (int d = 0; d < ilog2ceil(n)-1; d++) { + kUpSweep<<<1, n>>>(d, dev_idata); + } + + int z = 0; + cudaMemcpy(&dev_idata[n-1], &z, sizeof(int), cudaMemcpyHostToDevice); + + for (int d = ilog2ceil(n)-1; d >= 0; d--) { + kDownSweep<<<1, n>>>(d, dev_idata); + } +} + /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ @@ -41,12 +53,8 @@ void scan(int size, int *odata, const int *input) { n = (int)exp2f(ilog2ceil(size)); idata = (int*)malloc(n * sizeof(int)); memcpy(idata, input, n * sizeof(int)); - for (int j = 0; j < n; j++) { - if (j < size) { - idata[j] = input[j]; - } else { - idata[j] = 0; - } + for (int j = size; j < n; j++) { + idata[j] = 0; } } else { n = size; @@ -54,25 +62,16 @@ void scan(int size, int *odata, const int *input) { memcpy(idata, input, n * sizeof(int)); } - int *A; + int *dv_idata; int array_size = n * sizeof(int); - cudaMalloc((void**) &A, array_size); - cudaMemcpy(A, idata, array_size, cudaMemcpyHostToDevice); - - for (int d = 0; d < ilog2ceil(n)-1; d++) { - kUpSweep<<<1, n>>>(d, A); - } - - zeroLastElt<<<1, 1>>>(n, A); - - for (int d = ilog2ceil(n)-1; d >= 0; d--) { - kDownSweep<<<1, n>>>(d, A); - } + cudaMalloc((void**) &dv_idata, array_size); + cudaMemcpy(dv_idata, idata, array_size, cudaMemcpyHostToDevice); - cudaMemcpy(odata, A, size * sizeof(int), cudaMemcpyDeviceToHost); + dv_scan(n, dv_idata); - cudaFree(A); + cudaMemcpy(odata, dv_idata, array_size, cudaMemcpyDeviceToHost); + cudaFree(dv_idata); } /** diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index 875f4e6..a447c5f 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -10,6 +10,8 @@ __global__ void kScan(int d, int *odata, const int *idata) { int k = threadIdx.x; if (k >= (int)exp2f(d-1)) { odata[k] = idata[k - (int)exp2f(d-1)] + idata[k]; + } else { + odata[k] = idata[k]; } } @@ -25,15 +27,17 @@ __host__ void scan(int n, int *odata, const int *idata) { cudaMalloc((void**) &B, array_size); cudaMemcpy(A, idata, array_size, cudaMemcpyHostToDevice); - for (int i = 1; i < ilog2ceil(n)+1; i++) { - kScan<<<1, n>>>(i, B, A); + int *in; + int *out; + for (int d = 1; d < ilog2ceil(n)+1; d++) { + in = (d % 2 == 1) ? A : B; + out = (d % 2 == 1) ? B : A; + kScan<<<1, n>>>(d, out, in); checkCUDAError("scan"); cudaDeviceSynchronize(); - - cudaMemcpy(A, B, array_size, cudaMemcpyDeviceToDevice); } - cudaMemcpy(odata, A, array_size, cudaMemcpyDeviceToHost); + cudaMemcpy(odata, out, array_size, cudaMemcpyDeviceToHost); // shift odata to the right for exclusive scan for (int i = n-1; i >= 0; i--) { From 2db4fe1a6a516d4988c6b6c7a9ed968a10f3fd61 Mon Sep 17 00:00:00 2001 From: Terry Sun Date: Sun, 13 Sep 2015 17:43:51 -0400 Subject: [PATCH 06/14] Add thrust scan. --- stream_compaction/thrust.cu | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu index d8dbb32..ab15899 100644 --- a/stream_compaction/thrust.cu +++ b/stream_compaction/thrust.cu @@ -13,9 +13,7 @@ namespace Thrust { * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { - // TODO use `thrust::exclusive_scan` - // example: for device_vectors dv_in and dv_out: - // thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); + thrust::exclusive_scan(idata, idata + n, odata); } } From 38f9e26964420454ba4f4d59896595a4f810b4f8 Mon Sep 17 00:00:00 2001 From: Terry Sun Date: Sun, 13 Sep 2015 19:13:35 -0400 Subject: [PATCH 07/14] Efficient stream compaction. --- stream_compaction/common.cu | 5 ++- stream_compaction/efficient.cu | 62 ++++++++++++++++++++++++++++++++-- 2 files changed, 63 insertions(+), 4 deletions(-) diff --git a/stream_compaction/common.cu b/stream_compaction/common.cu index fe872d4..6b36c0c 100644 --- a/stream_compaction/common.cu +++ b/stream_compaction/common.cu @@ -23,7 +23,10 @@ namespace Common { * which map to 0 will be removed, and elements which map to 1 will be kept. */ __global__ void kernMapToBoolean(int n, int *bools, const int *idata) { - // TODO + int k = threadIdx.x; + if (k >= n) { return; } + + bools[k] = (idata[k] != 0) ? 1 : 0; } /** diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index ed47710..74b1622 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -74,6 +74,18 @@ void scan(int size, int *odata, const int *input) { cudaFree(dv_idata); } +__global__ void kScatter(int n, int *odata, int *indices, int *idata) { + int k = threadIdx.x; + if (k >= n) { return; } + if (k == n-1) { + // always take the last element + // `compact` will adjust size appropriately + odata[indices[k]] = idata[k]; + } else if (indices[k] != indices[k+1]) { + odata[indices[k]] = idata[k]; + } +} + /** * Performs stream compaction on idata, storing the result into odata. * All zeroes are discarded. @@ -83,9 +95,53 @@ void scan(int size, int *odata, const int *input) { * @param idata The array of elements to compact. * @returns The number of elements remaining after compaction. */ -int compact(int n, int *odata, const int *idata) { - // TODO - return -1; +int compact(int size, int *odata, const int *input) { + int *idata; + int n; + + if (size & (size-1) != 0) { // if size is not a power of 2 + n = (int)exp2f(ilog2ceil(size)); + idata = (int*)malloc(n * sizeof(int)); + memcpy(idata, input, n * sizeof(int)); + for (int j = size; j < n; j++) { + idata[j] = 0; + } + } else { + n = size; + idata = (int*)malloc(n * sizeof(int)); + memcpy(idata, input, n * sizeof(int)); + } + + int *dev_indices; + int *dev_odata; + int *dev_idata; + int array_size = n * sizeof(int); + + cudaMalloc((void**) &dev_indices, array_size); + cudaMalloc((void**) &dev_odata, array_size); + + cudaMalloc((void**) &dev_idata, array_size); + cudaMemcpy(dev_idata, idata, array_size, cudaMemcpyHostToDevice); + + StreamCompaction::Common::kernMapToBoolean<<<1, n>>>(n, dev_indices, dev_idata); + + int last; + cudaMemcpy(&last, dev_indices + n-1, sizeof(int), cudaMemcpyDeviceToHost); + + dv_scan(n, dev_indices); + int streamSize; + cudaMemcpy(&streamSize, dev_indices + n-1, sizeof(int), cudaMemcpyDeviceToHost); + + kScatter<<<1, n>>>(n, dev_odata, dev_indices, dev_idata); + cudaMemcpy(odata, dev_odata, array_size, cudaMemcpyDeviceToHost); + + // The kernel always copies the last elt. + // Adjust the size to include it if desired. + if (last == 1) { + streamSize++; + } + + return streamSize; } } From 64c5cdfbe23f11716d3481d71ca43616870a7b5a Mon Sep 17 00:00:00 2001 From: Terry Sun Date: Sun, 13 Sep 2015 19:28:16 -0400 Subject: [PATCH 08/14] Use device_vector to remove mem access from exclusive_scan. --- stream_compaction/thrust.cu | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu index ab15899..10c3cfb 100644 --- a/stream_compaction/thrust.cu +++ b/stream_compaction/thrust.cu @@ -13,7 +13,10 @@ namespace Thrust { * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ void scan(int n, int *odata, const int *idata) { - thrust::exclusive_scan(idata, idata + n, odata); + thrust::device_vector ivec(idata, idata+n); + thrust::device_vector ovec(odata, odata+n); + thrust::exclusive_scan(ivec.begin(), ivec.end(), ovec.begin()); + thrust::copy(ovec.begin(), ovec.end(), odata); } } From a1cf831946cb31a8bc582e3a50b826138086d0d0 Mon Sep 17 00:00:00 2001 From: Terry Sun Date: Sun, 13 Sep 2015 23:42:07 -0400 Subject: [PATCH 09/14] Move scatter to common. --- stream_compaction/common.cu | 17 ++++++++++------- stream_compaction/common.h | 3 +-- stream_compaction/efficient.cu | 14 +------------- 3 files changed, 12 insertions(+), 22 deletions(-) diff --git a/stream_compaction/common.cu b/stream_compaction/common.cu index 6b36c0c..cf9b41f 100644 --- a/stream_compaction/common.cu +++ b/stream_compaction/common.cu @@ -29,13 +29,16 @@ __global__ void kernMapToBoolean(int n, int *bools, const int *idata) { bools[k] = (idata[k] != 0) ? 1 : 0; } -/** - * Performs scatter on an array. That is, for each element in idata, - * if bools[idx] == 1, it copies idata[idx] to odata[indices[idx]]. - */ -__global__ void kernScatter(int n, int *odata, - const int *idata, const int *bools, const int *indices) { - // TODO +__global__ void kernScatter(int n, int *odata, int *indices, int *idata) { + int k = threadIdx.x; + if (k >= n) { return; } + if (k == n-1) { + // always take the last element + // `compact` will adjust size appropriately + odata[indices[k]] = idata[k]; + } else if (indices[k] != indices[k+1]) { + odata[indices[k]] = idata[k]; + } } } diff --git a/stream_compaction/common.h b/stream_compaction/common.h index 4f52663..73fcccd 100644 --- a/stream_compaction/common.h +++ b/stream_compaction/common.h @@ -29,7 +29,6 @@ namespace StreamCompaction { namespace Common { __global__ void kernMapToBoolean(int n, int *bools, const int *idata); - __global__ void kernScatter(int n, int *odata, - const int *idata, const int *bools, const int *indices); + __global__ void kernScatter(int n, int *odata, int *indices, int *idata); } } diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 74b1622..41e9125 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -74,18 +74,6 @@ void scan(int size, int *odata, const int *input) { cudaFree(dv_idata); } -__global__ void kScatter(int n, int *odata, int *indices, int *idata) { - int k = threadIdx.x; - if (k >= n) { return; } - if (k == n-1) { - // always take the last element - // `compact` will adjust size appropriately - odata[indices[k]] = idata[k]; - } else if (indices[k] != indices[k+1]) { - odata[indices[k]] = idata[k]; - } -} - /** * Performs stream compaction on idata, storing the result into odata. * All zeroes are discarded. @@ -132,7 +120,7 @@ int compact(int size, int *odata, const int *input) { int streamSize; cudaMemcpy(&streamSize, dev_indices + n-1, sizeof(int), cudaMemcpyDeviceToHost); - kScatter<<<1, n>>>(n, dev_odata, dev_indices, dev_idata); + StreamCompaction::Common::kernScatter<<<1, n>>>(n, dev_odata, dev_indices, dev_idata); cudaMemcpy(odata, dev_odata, array_size, cudaMemcpyDeviceToHost); // The kernel always copies the last elt. From 98e43c3e600ff9fec21b599485da767d9704b702 Mon Sep 17 00:00:00 2001 From: Terry Sun Date: Mon, 14 Sep 2015 05:09:43 -0400 Subject: [PATCH 10/14] Add performance data. --- data/cpu_by_arr_size.csv | 31 +++++++++++++++++++++++++++++++ data/gpu_by_array_size.csv | 17 +++++++++++++++++ data/gpu_by_block_size.csv | 10 ++++++++++ 3 files changed, 58 insertions(+) create mode 100644 data/cpu_by_arr_size.csv create mode 100644 data/gpu_by_array_size.csv create mode 100644 data/gpu_by_block_size.csv diff --git a/data/cpu_by_arr_size.csv b/data/cpu_by_arr_size.csv new file mode 100644 index 0000000..5a5070b --- /dev/null +++ b/data/cpu_by_arr_size.csv @@ -0,0 +1,31 @@ +size, scan, compactWithoutScan, compactWithScan +0, 0.018390, 0.017990, 0.062040 +1, 0.036900, 0.036660, 0.125410 +2, 0.056000, 0.056050, 0.191390 +3, 0.075810, 0.077990, 0.260930 +4, 0.098610, 0.103510, 0.342300 +5, 0.130480, 0.136290, 0.460860 +6, 0.176600, 0.193570, 0.614170 +7, 0.249980, 0.288220, 0.837180 +8, 0.379200, 0.451990, 1.180500 +9, 0.595360, 0.787670, 1.739130 +10, 1.010410, 1.459940, 2.873860 +11, 1.808420, 2.799750, 4.983760 +12, 3.382930, 5.454100, 9.189020 +13, 6.617830, 10.682090, 17.652109 +14, 13.106260, 20.930850, 34.538570 +15, 26.239680, 41.440238, 95.065141 +16, 53.197820, 82.259148, 232.273063 +17, 106.942953, 163.591766, 524.566375 +18, 214.327141, 326.009719, 1154.182750 +19, 444.831375, 655.437125, 2664.925000 +block size, naive::scan, efficient::scan, efficient::compact, thrust::scan +2, 1.653706, 3.275751, 3.377140, 0.149708 +3, 0.868973, 1.679395, 1.735914, 0.154134 +4, 0.474548, 0.891957, 0.926928, 0.155243 +5, 0.268520, 0.502705, 0.531410, 0.158484 +6, 0.156136, 0.404647, 0.434957, 0.164572 +7, 0.144736, 0.393489, 0.413897, 0.166069 +8, 0.145956, 0.393555, 0.414129, 0.170372 +9, 0.147912, 0.393687, 0.411637, 0.174212 +10, 0.163202, 0.407150, 0.423524, 0.180073 diff --git a/data/gpu_by_array_size.csv b/data/gpu_by_array_size.csv new file mode 100644 index 0000000..a9dd910 --- /dev/null +++ b/data/gpu_by_array_size.csv @@ -0,0 +1,17 @@ +block size, naive::scan, efficient::scan, efficient::compact, thrust::scan +4, 0.019901, 0.032131, 0.042268, 0.014861 +5, 0.022966, 0.039091, 0.048816, 0.013974 +6, 0.026545, 0.046097, 0.055637, 0.013844 +7, 0.029734, 0.052625, 0.062891, 0.014107 +8, 0.032616, 0.059878, 0.069612, 0.013777 +9, 0.035588, 0.066927, 0.077829, 0.014007 +10, 0.039123, 0.074742, 0.085684, 0.013909 +11, 0.042336, 0.084190, 0.094274, 0.017210 +12, 0.049155, 0.093326, 0.103361, 0.018924 +13, 0.051340, 0.112999, 0.124146, 0.025679 +14, 0.066044, 0.153436, 0.166579, 0.040478 +15, 0.092430, 0.233036, 0.248662, 0.040362 +16, 0.145964, 0.397543, 0.418071, 0.049354 +17, 0.249689, 0.733529, 0.763885, 0.072880 +18, 0.521292, 1.435356, 1.482930, 0.095224 +19, 1.806152, 3.125029, 3.233285, 0.178910 diff --git a/data/gpu_by_block_size.csv b/data/gpu_by_block_size.csv new file mode 100644 index 0000000..3bf25a8 --- /dev/null +++ b/data/gpu_by_block_size.csv @@ -0,0 +1,10 @@ +block size, naive::scan, efficient::scan, efficient::compact, thrust::scan +2, 1.705963, 3.371424, 3.477096, 0.152979 +3, 0.869121, 1.678385, 1.735070, 0.153120 +4, 0.475849, 0.892212, 0.926461, 0.156361 +5, 0.268811, 0.502830, 0.531393, 0.159694 +6, 0.156100, 0.404411, 0.432535, 0.163947 +7, 0.144814, 0.393823, 0.414350, 0.167775 +8, 0.146012, 0.393572, 0.414207, 0.172095 +9, 0.148004, 0.393960, 0.411924, 0.175363 +10, 0.163155, 0.406132, 0.423445, 0.179432 From 263f4ef3f09b6408a0ff03469517629b09ead3b8 Mon Sep 17 00:00:00 2001 From: Terry Sun Date: Mon, 14 Sep 2015 05:36:33 -0400 Subject: [PATCH 11/14] Add charts. --- data/compact_by_array_size.png | Bin 0 -> 19727 bytes data/cpu_by_arr_size.csv | 10 ---------- data/gpu_by_block_size.png | Bin 0 -> 19744 bytes data/scan_perf_zoomed_in.png | Bin 0 -> 22583 bytes data/scan_perf_zoomed_out.png | Bin 0 -> 18702 bytes 5 files changed, 10 deletions(-) create mode 100644 data/compact_by_array_size.png create mode 100644 data/gpu_by_block_size.png create mode 100644 data/scan_perf_zoomed_in.png create mode 100644 data/scan_perf_zoomed_out.png diff --git a/data/compact_by_array_size.png b/data/compact_by_array_size.png new file mode 100644 index 0000000000000000000000000000000000000000..d426344a88a019b2db7d941580f1756cb1585d36 GIT binary patch literal 19727 zcmc({cRZH=-#&gK6hdZ5q_VPS_GL@4p%iffc6(VG(T(U>9v-kFW zUhnsPzdxV*e%$x>uixYM^>{?r#d)3UIgaOX9IrE2OGEiA5j_!tAZKr=DCi&vZaISB z1fIr)cNUtKU&4QPZs zE1YYl|4i{r7LUl4%W82>pXxHpEhIqu^*M?MvJ{H%-#vOmU&C_RisGDBgVm{LcyG=U zKWB|bR(K^=lGH3;MHZHR+X+4DOiW4=D=kW#O%#)Oq-A8pi9MDHJ}ozV_=)<>%mhCW zrga?nzIy5Z=?mY_Z|DSPCK>`XQ>2AOMGf6dgcSRWOdQ5O#igVaFNYvjJX3f*)XmM! z!yoO2Yur|%t{JYcuXlEK+S=M?X(WleE)H<&=jGJ64n3UfN#*C~PZD>je!7-c{b!wb zWxRg;llw-U*FljAP$# z^hH`yP*B9aMCC6OarkD*OC!J1MILOCsxy5}4lG+q1W~{~F4aBI9!$ z&3y-Rr!Az)`^dwE;s8BuZEY%2kDcYCjc+mx z=MPWDD>84!bSH>9B%SBZuLwkjaQBes4kyP)H*Va}(9k${?%c?_QMt|1a7D3l=*6tMZjUvP14ef=)Kt7PoCsKb~R$^LwH${hQMxaWEso8A&3*~OmJ!8f_dGcz+u5^mCN z%ZZrUqn-MbrJ?dazBqVu(-1^i%ay6AmkzjeKg@nhIXF0A5_g{K)s)F3xm2!oJa34$ zuW%U8`g$XR^}{b;*^`&pO~+~_O52lsP%Bfz6%JJly)u)pXnA8T_A9dbm6VlryZ-bS z%UJcG;Y;ZaeVa!L&M7>&&?{e=d@_`6Qm>}u859n!)Sq-0Sz21^Q0f>PFV4>P!`)*L zwJ*3`$|@qVeYm&5B^NW($dF2GH*swo7eRt{=S|_V@!O! z>XP%ySglOu^knm^T-uvweIp_$b>#|U0s`<{#yN<3@{*G;mS1gYY3bBD<3q>wbs2={CBBPhHS2 zK0Nr^4RJTyoirj6yRE5AC*%DIV`4X29W{nSB*n58b9ruB=5Q5Hl!?#FtzF;R+IkW7 z5+k>leb2A@%|)r9v9V+U-Me=$;>YlqRL}kS)8X@Qz(jiy-KjjR>usiri-*S~<)N?_ zJ{st%!m*m*P@2E*%cb+5H_dP@Sn%ivp8L4rFAkQ@ADOB`HLl+X@OCrp zJ;w@jBae%1a$?zU*_ zuLiboeXz+RIyG_AFFj?}y)@iL&ikA5a6kYHx)Q}AL{Cy4|7v`ZZ&<9O{OL}G{m427 zqi5qrNkJjKH%0j|toFi}X70m@`Ivlrr}Omm0vqRJtKNk3$~e#U1XHr@@2q^Qjr@H0 zs#kXYQ@hJ`#1sYXIDyX;*98^e?+>+*bx`E#)2CXvFJHd28mZ*pVuZl#9-e4Pr zZ;-7PCwKI8A*tSDDkVWwho8F$nKpOV9zbO%st#_V zB*`sKiHdSL+3~AF(3kJFvuv`Ap+rXgAWCK@ zCk39aPQ=EZw=7{8mee|l63@cSXxrzQ)QCh?R#)4P*BxPUbvs8;a-%;10JPOl9Lbhx zZXe)7Wy0w79&c@J<>IGN49V0?H@~9YSsG@W?7V+o%xMbXSWbY14Te2y3c3%;*|Sp6 zIH1ACR(>HNXjNwG{&`7CDya}p*UZ#Z_M6U5eDw+>PiM1tGBh{c(Q4=UMq|whwjG4 zBha`5N$5%*d?mFkMWdNR&H?%$K7HD%FR!mFK@Term<04WUxO1Nm(l_Yx8{b_;1C0HwXGn#9#^dT8Oa|kmg z4RE3~2S+Y+yG<^K0{}WjcPieFiQl|=vmM7)>}mi+htF1@p|h5_cynZZNMI%z zZ{s3U3WWQGCnBB-1u~K#$3gW-#fI~4o#zd1Wq{+^>Iq7PtTlAP6qsnOASb{>7_^90 zk1xI6!fmIOaYGzzM_XD2$eH~yPdz~dlPFkNqcT#sv$mERvNTc^u^?Y~&ZS8+Nqp;f zsEkfC`z=0wA`+4e@`#ui<$P~%Z>oE-H{MZES-JfAS*Z8WrYvIyZ{d}$$8`u@0FN|0 z#wvTh3-1B?ek3Ic;obVvpZ#!f#b%w4mUe0i&&S6{ytiaF;P1;~Gxo{d$cR3P@hZuX zyR0luCON0^`}++eRZkLxtQ&`q8v+QM%|6-mCdi-6on;Wo=#l3z1vm^gx5oldp>F z_Eksw_Gs0M7-SlA|hUegg}x&1*KqM=osb_6&1a5#T=8P-Dv~!o&H^2IA-oFi@k|ZndA3mWh7)| z*~&&HWmeyxpP?M9^Lk`%-ZaX^%lmC;s2aeiO&%+pqHAx7CrbSo!0Uj%6u+03*Rsgf zt5@Ngv$C>s7Bj+Mpsa37-v>-}x5kzG`t^Y?U%vG8gdOMsP?NHU3Iu~vnXQ8Z6Ag`} zon2v8Jj~lxOog3<#dm9MdeAuBuNg#ngoGk@7NLnRE-o(g7c((2-P6;Xnw}OmZw)Ie z6T15vd&zH?2{6UR$B)5a@as!ZM-1-@OdE1?{(cQIY(HQyh3In?77+OH^QQ<0N89)B z@3>!wgy>kjz!4fLnoV9TNw}?mzdiAzGr{C-yK%9v_&j6?t&1TJN@Bw=Ir;82ehG;h zm`|F&ekHpodFy8j45sL>1qEF8sZ%~OswBHIXT%@%lbuWj5FIO5?rTba_Q`BVt>6ni zkDd;wtgM7ebFa#&d0^ny6;h(b%hv=2H+I*416=AvHv+m-Jh^!WPC|>uD4GoFdw5EU zjNMQfpy}^6_wS?Q;%nn$(q1*Q32_!HR^z7ztnhTP~t0;Lym~VFxA|vAn#F4&DgT$kOQ<78h4_zrkPdVYgnc zPV?ZPn2-==WW;zh3YT1HD7$5tM&D`kB4BbAWa2AG1$}!(Sl&Wq*Qv z)@3ujb?(DqG*!fPVPVc2L0RED4E_ zpx~du((LL#K&_#L;i{^stN)&y#Kpx05{8@N>x&3B&7Vak2LZgN;F<;^8#wblsq$)S zud5&7po1&xKOqS7E4~dJBvLW64qo^N{ww&VgLrv;d!SgO_I|+u%0XPB?e1uW7)_sGuW(ekQsDAk$;}LqC2r{ZO5tOnqR`;YGU0nM5`%eHG z0_~Iq{qON)6U6xH$F{DnvuDoG3ENPW!q}r9W%!@11Ih&civWO^K+1hz09r}u^?B`E zx2$Y!JO9Tv(1Vr@$jG?j+6O_YsaafE8C#*j3=e`nR=TL-JkEB;OHiqlGo*5e(zoOxOswoT;H9%^UH?Lopm6xllsyaD4n-iy} zrCr(KC?`jq=iFO!wa$8PV$DI@?SwfTWl8d+L}go9NUv^hOT(-K)mvtNE}hK2^VG|~ zYW}NN#14Y{5p1_lqiZ8Y_fX0w+0Tzlb1-fw(ciRI%up2JSjTGI9CUPap!a6lLsfBy z0Yp<%6EC3t+cLn(w3EzJNDdeFzMh4u54l0hdl#s!hvw#+6AgG-JtHHZ;D!_x72)IK zudS_75kqD`%KutfNr{M%mu(mutGgM)vrs4oO_t_+iKpZ8NEJYEZi)xQP__w3FSHgJ z85v)A}A5;Ip{<9Yl(>&_vY>)w` zX?S=L)C=tYh}uL_Jhm1%uakH5_37WccQTve^Ru@%LE3A- zDVXwGqF8!65pvp(2Qa~uLXUfUJ0VP+2_kkOAtBX1C&v&dFzCmNI$#6soAf~epmtSp z_TMlwQjs<`HV&0PQqG52g^u4$!PGSM!-o$#p~UFBv7TtI*u>M~H`eyh0D}1VRu5L{ zSNe-nLPNDZU^Ev7XQJ6-@Cg{Eh7y%7jCtWqu#XvlF3Zoqwj?7hefLUobvMsVDfTECAmyTg#>weg7%ZIy zsw+Wg?^$FKbc-@iP#$k>jh7b7|aQcAW=Gp4SD7;{)!95437Qb~zG_{6}#0K=!5c>RNRx)NNb z==x!hv3hhd8oG<`KYRe9x*NF8mC>4*$Vi0-e~;94!r9D=&aGjWU}Ccxcz>yUpMZd% zH(S%q)wS;NZvw^`1H*z_vP)9JC$h4#($dm!n@2x92#Sk~OGp@oX^TBwYgqFcaAsS} zqct4GJtDwT`E1TBdY~ zoJ2rOS`2$YS1sJ_T>#?$rq6b7Z}0TC zOX#KfZ^9NYfh`JYN*Bg5>9cIn78O*CIir~`_#d<^j#O=c_@tzyG;#!d<}Ih&KV^AM zsudW$7N-024K=US$pOE10EG1B;$VaUhxm1I@gI#qS;@=E<)mv?J(hK|wY?euH_ z#L4&cM8CUxH5R-~f^LJRbs8rPdF|qJaJ%Q<)vki>_#_J?hQ*9$HovQavibmDyRue5( zq1Wr5ev^nUkk^fgz0J&_0*RWf9zWd0*H#}y&SWhHAaNhM^y>}AbD(~g2iX*7hcW&KccdVe6 zU>B6biXkv@-`cECeYNf#IsrplC$6Bcowaow=rwQd1I;b?@m+X0OFcckT=3p~P)act z#k$#~8#JAXV!flIChkFaJ?fQqY5u>u+o}6Nv4xK~*xz3UbPdcu=wu@w9uStlARt($ zkB8)h0{wG*ydEMNBYCgV(ey{gjbFchLD(0r+CReN$f`ecb^Vx|n~PcDZ55w`F%{^r zi|5a)1fjiIxd#RZqeNqD$Lo^zzd$BJX|wr}ai;bY%(&2oV6+|^JFZ!&>wtF9)s-nH z);EzXSM|0C)qZsJ0%TxmpX0G_RfaIK3(}+|Zy}WG{6z}NZ;4t%O z34=fW-ZbFQ@$8ktSKV`DU$M`Xpr@j8ULFy%dEDWm#W`N# zkP;I^TfPk9)!Vmk)!wtyl97c1QDIW$^kTmSl(ZlH{n@urTc84!LGRm&fI)r^ct>Cl z4|Y~M@|13C$PPxvlCg_x?|PgZOI46xK0%_gM*uA(o~Bl0G_yH(wPVmA+JsM3^kD)X zC1pgH*+`6|Xju6j`+&)*siq=to))f~N5}pJ0TikPAU?-P{shK!z+z#}iA>7qHCD@V zadHw^nI3=jA%Ahi{We@2|7v!Arh;1^%%Bx`?QfdaQDFTLLvW*g&A!L-KWG=IJcUpP z0Qf>*QMLU9xeKx+usDD!J7_o$z9!+INBH)9U$g~loq> z*fVxWQQUlcC`A}8of9Bpv0UcinsTQMqD27{Gcv7hCY@tgJO(}e94YL)>@CHHEY$Ht zu_uy}l6)r!>In=fh)Q~sC!a+-mB&bl1#t!W1{uPM2N)0~4A%^%5`(yV-3THEJ6gle z{G`ylCnhGY-i+??kBb{5tibUt`QjjuOIr7d*$NoAijtC{dLNlPPLoZ+fD{<7JKhH- z$~F~hE>Owy9K^)00DBD$4?l5BD=zkd!4(88_K)_UfdF=@251Ai@gGcXZ`G5BHau}m zu?GCGM<99)4f}!Y5Rmceig@?#15EOhlV&fNXi;wLwyQ`487ihTL9`fJ^4+_4p^sm> zbO}T8Az%dT4-hv{T+BCQfxic?8=43ZfWLqL9xi|Mbu>W0MzI4*^&!YlqP7EYW~eQ9 z=!$xJREm^-QvsKi-*SN}4@m@Nj7jpTBR~|mMG1h^tzRDF%$Xz$J|s=5Zww`7wpC@4 zgvw7|4d|U;=BK3XzUSNGamziMY~Wis9jc zV^-zrutR-0D1yHT@p~9!-4XhHH2U+mZ=oK+?(Xhbp9N@npuy!F&i7`QC5DBelAIQZ zK(DF05mCRvvvtvW55{kdB#_$A_SUCOYFsm8K%04U_aYtLVBsCnq3qz`HfUDBi}(Kc zk%OPQEG&re`4@1Y94o#jGXygtK}nu0FZek;RaM(TL`q9fhYF<@swNFm#a-y|OEyp! zKXx=t5`VrhyQV`JvwI(9-q5!4v z+lx{Q3kya%I*s|%__*Rb0KD`;!d`SJ#_XiROlkmZmYM_Y)l1^z@89!f1zKbj>;*># z{tWr<1Uw4#^eAU$#Kbp_P?Zgal)NgVD{hI_Go5jm9$;$PK@_HMg@YEc!C3tpqVW7L zh{A={I%+=xFL`60A`*$yRB0%1a1!AeHiu=}kUwV-lR&h87DAS_CPGwyHY_ZaG z7tvA(?y2$uU{4{OHg|Tik6QDTbaW_$Hvs>eDy>5eQ$2GZP*s^Hpi8KxI&2>yh{`&r z@Ql#$PrkjaAjHbFKn(uac2>~T{05<-n~I5u+IqXTO$Rmu#(B6w-eCKPGyxeHAdN}a;bFH9Q zdO;&b8Sg_Z?nu(a;toY(OeFLvz#VbolZHjcrpbli>3BgvMB|j#APA`e#Xz?kV5Y6@ zA`GfIVHd6AoUn40jm-k+8&fIk88;%T-Bx%gzL}4}bq3OxgGNQ_HtQ2l^5)Cp9PJlC z#5&p@cF5PySDJ-YEfLz?z{=^u#mWS#=jVqw2y{LGy@Hm&pT z7MTB-cf^v#3Z6ZCHuD8Y!%PycT-UGfMrcY`xo>18ZiMekuiCUaW9Dze5bLM)a{|ZL zNsF@i+B=!xP`88&x81r-R|#mrS#SL@Kq00}%KY14n1g=I=FAjoJF&;V#A z?JX^V+k^Rryqt^Q(+QV2q4KQbxJ*|)IMA8@^Jhb~{rZCsT=ymg1qBoMA1GyOBw-op z@bE?|)#RANom2t0zc^OX(}b%y3l~qBRqxiH9M`;ffnSkGcL`eo!MUSG44~`WV~+ug zrn}cc4#$xKDgrpjI-e71W@g`c@>1@c0vi>xy5=6BX+m&2xd?64t@mCJG`=A2(3cM^ zh_E0$AKMkXUUHx1rJo;;AO*Ag-IB4Oc3lr?bObOdYfy{kPp#fjN2b%7tb<{MzY6I} zj|QvCgRgJ5c6N#li!TaC(DE3^3R&wseY*P(jH!g*hlf)by)P|&z!MTP3RJj>u`!sX za(qTIZcuuNxisXCr$k1YJ2;$-x{X_yn@`Qm=%gmvKzjBqM0)w^>v&Vw__nJ#vUO9(HucBhfE=qgOEiA-GM;nw{cEK=@HR35K z@YAwwEf0fY<7ZZB^ly?J(eTCuPO2!r(W z2LKw}cNTyB;s;p)+87+#47Qrz(s?qsv?S%RX|>>9T~h;%s|WmQ9t+?=h|zxowSWL0 zA7-q#^oB-uaQ>jD!pzvIreUr{tgdg@U&Od^R_)^0#f)&55*SZcL9`64hl&g3Dm7lP z@VZ+HG-j&-?EPG?Ym-#%By?8Y{0WAoTTspyfIX>m z{M87}-@yADWOOpv38ZFbXK#*v{`oT+m%c{T@2xaWlnjqA=P!vjXcut2h`d^B&rSM0 z=J9fCvYFU5uDXAOX_bMyrRbyms5n%Xo%NOG0sWaD0f{!AJT2UNOBLf!hlL!*5_cs0 zF=?e&N2DA@K}Rt&v`AVc=Xcd#kj^Fcn)r;<*559}dwV6IfIj*iKq4qPKxhnJBDML> zA+GOT0d)7;))rRyoW{F7jE$004;&&mwv;H5c#>WU98_(j#EEa5Rr}ME<660)dAJsA zaF$oEHn+6253&BGq{aWRnkFM;M!}GPFr_tl_5hsm@j0QsX7I9UeEL?kv$Z>0h676JwF)2C1DSs9s`k3%m~Q(xVa%YA`xe*9C#R#TAiE#qdXPxRV{S?^$O zn`8TT-Sw9clFacFUmZD!VTF?##w?S%?0>sM6U0ilG(CYR8&;vh7Ry6m^1Ug;<#+Rb3IeLNHAks&g}sS*bbkILNKH>qNk)EVW~_1L`1n{% zOpJc|L8x2J!8g=nh6Mv;*=F4GjeYXZg6T>GZ+chyx2>$2Eh3i7|?fA3p@d z#XZ(0n_+~`9_gv~IRRx{Zyk-aXQegZjqLdn1`r|waF)jP-d=F001{2n7 z_KY=gb#w&sr(ntt(NmH#qyfZF96(r|0WcVbFlh4+Z)zl7=ZAiYM@af!bnGg)I)H%A z$OiP(oh;XDJl?S#O z(C6=E*=}uvBICWS{(BAs@BKXT%Dw@`t)Qr={Rt(#s|pZ&jaW`iH??ts?``W)c?+(+FcTTtA; z_xJArONx2E^QS))d>?4P0-K(j-6=9D;o;cH2e?)Usou*GWWX)Oyn82G1Bjv*c57#6 zr#N{IAEqE6PaaxW@b}Y#m!owQ&u#<8H!T0B8yy|(%u>chL5Q1o0uy@1@l;qrg~~z| z6)kP=0#Ir5ksMuGNnaWpW7BhdDK>EM+28F-e;tt zO^6RPA<($b(g~P1y*!7tco_74{(K*xVt02pun0XcK=(mu()z93<)ft)788TSFW9z-l!Q?{lHzlF z)HUlpm}~$pmwo)W{3DZ?TX4^prc&lB7sbgknH!lojL?u98B6moK|F^xmEGv;>kG~l ztY;8wL_Yiu#&YoWd9HGYd+>BllZB{*Q(Htt1Tzgosz@eX)Zqh8K_MS^Nvj5{rMu2xh$%F66oMu>jx=V2b-iUv-`D+uH-H zhY1TKUr^JPSIGws6YCDj#Kf=+g!@O!uK2$hHG*#t;35uV zYGMJ^$Izbf2uRR!9vK$xS3zC{-UTG9aG&-FR`rFk+8Pzfh{te||DNO0UdmIak2r$c z-J$;IBc!8V1x&Kq%wf8sxwDPv3jaNqFMl(G5n`%)$99|wYkR(FXKRTgcR^penqnc% zgrG&vOHMRS_)U&fyS%!1wYXfc)@HlJ3wX*nIaa1J+re;-Y*+23DgyO?hq(71J+vQS zqNJ2R%g49_7B3Gl=RA2V3mh=}UA^MFQsUx6qoYRveZloc9lU`fJmqYxdIl3BCL!?` z#3jOESt~x_;}-1`5!zf~a06TZ;lPInz~`rc`y3$5G)8@)YW1xg^~^=ZL8=t8Ir*^U zWc!B?za}*`G;ZM|1O#NL4?!S57q+XEZ&Hh+9_LG=cOi{e+$!FGddPX@mS=uXaH3<)ud4Co4{1tI~qw|{2 z$&ni|F|obaKP+A>4fM~;lO$}B=bn8}seC4h0qNukD!Otbfn;jUqk6p_En(MgNjtP)Q_Vzr~?YU3^gxQnKA3jWoj}K{zIeFL> zulOuk!j0z{8V8Tyd0?Op(YyTz$)BVEzJH%sTs(wT1Ui>hw*EVuwHcjzoss;?V|JC} zXeS8=rHj384S3Vwz5_~+X{%%7FGla9^Sq8tRIVokIZ!f~Y6b=d332h3uU})_zFoX{ z5x9UFr)gE-d`KC^70z-CXO4}Hfp06TsJ^D=01_WmIYjNd5fBf^*uO(Ru;lsb7>0=icEX&gOrrV=4mLIaf} zNf0!0UJ}_QhvS5E>nnPbNcuVZM+4q+Gd{`1mvEScpe^#P7SP0|fZ9s{7{dY$0S$OR zS4I^F{hRM8xK*G?+y`lxXq?UWqi&bOA3wFqN<>SevJOEze{%F~z;kJA#`i4&Syqcj zKL{@jZldoxh^$&H!q4$YF1Gr?YXR(Qn%5VsPLW(h^iG2etCPcaVIr{I>HaE$mIRrY zO7pT3spDf=?A9Taw$j*15sYPX{4z54MqO}5(>`K%B?d>NrP0_)`0wD_r61qRu*|1Y^FuQ!NFjH@lQMV3#o6p zLTCwR=+C#|S6(jc;NBcUo#Jy`wQqAuE**Y<#dH-x$v%(6I*>3X_~ZiWX9&+rhQZ+@ zQI|WxyCNA+0TgcJnLPx3L1=oGS^<7bKue2I5yCqKLUW|;%uFD0>(Q^{pyI3yu0uV! z0+*DXY8~fukhQ^eWDjlt`ZUPx(^)gv8*&4#oEFxP;?C!Uw!r?98Gb2XZT8QmIB>J6 zbn3t696KWG*yDQ)cY9ap@qjM`#$TI*%f{EoYV$pg=mjKz0s1Y1)`AZ?(R>ngR)GS0 z-&kH>K-j_d^?96}w2Y(?$2T17A?=+rXO_DjrX+A-ViMOk4ZF-t{(6t}X+ne(9NOZl z>Dep-}Jj4Rr0E}t) zUkF&9&Xo&$SE4_3c`x&jijtH8xm5|xoNYEC03^*_; zsDlmPH=_mjT?;GTIqy5N1n{9-5JIx=qDT~msNuZeFA*(4aDRY021+7;OAc;sMsQ34 z%6{|ajr;Jx&=8PG5hhDLsb2ue>fce<5-wEx>xfo2h>(RWuf5N`y&GUNXlrZJ&(n*O zbQiFA(b}pqCIZ|ckkyeFW5KJLpZ6&qEj|=aqHvl}V1hLmp+|MB%euO51EUDm9=it* z5jEsrt54$A);MqY8_DJK_d0IQ}`vWwuW`!r>f_j)qpM2oL* zKB;BbCAKNDnV_Pvc#?mJjSt$mve@M(75&9a570WM_wR?r#sYt@kpHkdF&8=&H0P>e zb4yF?8l&Fch>sb7mWxA=WV^RJ((xU>K1G51`8o0e`U^szm0r$h>=~GO> z?TBP27Z+}J_Lrfd*|*5a$sK>UoCVSVy!J#y(9KP%oweOYf8kAi#eElsz2Xx-aK(k9 z;1nq6>9KMsjK*{G^XjpDSmFNDSuSqw^z?L?%|?KXYo~+>J7$DHh$!5Tr_qoOM<`)? z8tZ>|p!ppII`e5i2RGbrSxx}y(P$+^D>R1Uvs}Tea+JkEJ%Ti6@p5cW zRpI-9dT!248+aBeyReR9eOuOkQL6%6|n9k(HP3v2Hze zvU{JE{1Q!T*C{T@ZCR9b4>vXODHfCNQ!GvXCC#|!V83W)rqfIUI81>=?P;&EpNTME zZzaTij$+2gzGWPTdVULGHix(T)K+;d)bO}qQ+ZZ!d9@M_5&2ErYiBrpzWM@jd-56b z(}*+0x}S(F>#h&=O*|5lj#JaM=ATjU$L_6!^k@Ah9HMUEahbftr+$S?atasO6`jI+ zqwA-}943SgyYQL9kvT_=bV3e{*n?qY7&YY;t&9P7W(os(Exiq1rbaMYz3@caR0~U@ zS1)|lxyh`h-cjiI5*N|(ZY6BbY_>+Zt3Pw^>WVhVOHwb_xkQNgPBJsGhl{AQaoybD ztu-ZZk%w+F4Sloq+c#K}0E<^ttgYvBw9|hU-*d3FEm&4Pdi4o~&I}mPQnDJwK{1>b zWP0l10WR!oVq#UW916&RtO9a6x?KO?%cDt#Kh@nXpUR;zvOs*-$?r?Ic6WnGZUqD< zkkGBLt9xP41T#vZ34?k`d#XMGga>XXA;9s>%uK*Y<_6Eecwubp32T-Bi{@0AalQzo zgVA0V47X6UfO~*>$il!HyBNdgFg5&)?|4GqBs+_#*C;I3779f+{!7uxDB6!?81e?= zwJ5l8!3AIIbr5|=E;tpS<4AZ;4tpD|DmPd5R@7L4O62W58xTkJLXZu=s3?6{oTqro zlIj=W5I}ISa%0*aTw;ySs9)f<$(5tjg-YpJ-sA=c>d9yD3x5x@c`203ibSz}_UwTH z1P?C&$PG3&HY2GgGip%K;UW|*dthzBXL0_bqd>Rb0|kGisKK!UKU#lE-fqIc;(4IbeHxCbzjd6mXiUjs8WItt(dY=ssGBRfu+ z=@eH|e=3ymKONkEH*dp6|8jC;UsNZP2?z?(a@}$SK@3bc*Cix^0t4d{67n-y|N9CA z!{1I7Ew8M zq%6W-_L)Bwwo>l2+2s6@zqWPu`}1$yzH)P$gcTVX=3p}|yi)=E4-Y4&lbahzp{fvw zMw}4|2{(OI;l?!=fDc5UyKfFuW~CMx89pyqnhsPptasOL?t2OYAuO5)tuqna6+WKx zDVDGb0p`%Wf-gW2fZ6Za)`H?$-vWxhe%XwyZ-oU<4A*82Y=klEsZmCh>boPj>>(Pn z0W!zCUx3t;luW#6?eaZM5yNobXIHx!TnYt+h3@r&xgNl&iN9u)5MgHi+}Wwt+_wv~ z^W42zSWW{2R+@em{{BLKaZXJ^iMWJ5+o#nL zWMHE%^y>*Rnq54JY|f$mUu!M;`k#9||9h=Po=n*VSe2R~;|k-2?olc%TvD9$IcJG= z;la}RS${{pdVO+yAyicydN>``^zGkLIYj&UMMi(eZsy zP*7@W>cTZt5+7Fj?*Ipp_2Af;G#A(FYKy$$Gqgwen;RSXu*!UaY!v8g7>M_;!ScKX ze}8`vw!?`aMnmdLjdw>>b$$mKz`Z<8yzYl3iO$jsp&B3&Ro}C>uaqe*jr;fwu11js|zscQx#O zR{q%!fRjytQ=s}9*Lk|bkO@H=zt-Q8i#-s0CooR zv@M|83@}*dI+M?#Y$ynaI))ZPGN*Z9OL-yI3&B zsr@bBVrfYU=2j5Q2M%?6N^htqsy((irNF|hGfMrP8IJBp4q4&plsi36*!x-B21bKo z<0>;-Tdw`_0v1>Mcbpdgpo4i_0Uv|d`j0lUjJ>exbqgqY6KYt~ld5s6#&yXC79-~e zkuiPz@WFKEIi7TdPVSNXwZ>VR=Ze|?yWxJ}k(|$6U?cwJCH8S#T3X8WfO{FRr&>Xe z7gASu48kZNg-_tsgoiNzh&q7>0q8$wghxQ2LO}xp$Y2pY->Z{vz-@m6IJqeCc<$ST z;%P>35ZH(+#MtnioZ>w0Y0nEDlBqyK(t5GWQX;F(HVKPBA!2!KeBFKh3gw}G=YOHb zW%!|Hz=I;*EaW$U+785FHT^^7aA37NhUra$N^TY-)^&0s>Ze zqa1UOu+PWdqE7+RPwa&K-%sJsU+(8CU>b(6|34lA;@dJj4r1m%O=2XGQ-EkOJcQBfAArjLQfhE=|LPiH|X0vEyJ&%$A_?qkKoKYtD& ziL^{%!}^2e8Yq(AMZk1~U7`mGGCDG1zQo4PJ_ip5(7PceRZC<@fS`?G%5iO!**ERjCUubM;5tce1WIn3{fo?g&U61azZvs2H8eDU@sqk`hFTp;6`v#*z%H#0oirD{%Q2JaUDd5_T12?FL4KpE=r>jqVroj)1QKq->V8W`7uQ z04kB5=fm3noGYT#mvLO)1aa_t66&9AKYAFe`Te z#)p*b>})^32GA$LL~-M*T@SheVg?>BGJh3Z3yZL7PCx3$Yp1cNh_4R-^0RVws~{}I zsqtC=xU#?l*9+DY8f8jLm1luJ(}MNXs=?^!DAqawt_3KGt|OI?fp-J_cZ1>pEKp$k zQypF$D!&Fph+7lvp~ugP4aQ9j;)8XUP**YlpD!dMlR^W*sGwe1TYCf$C-5cMyil5& z4ghE{&joM$U1MccTG%?LdQq44yaxG^x>TShQ!9868XSO-M*+d2@_Y@HH<5 z8S?sqSa_9&3>50UjoFjUUQLk4pqyq}dIRV8=`wKfhtQV6*(3oLJh1J8Pke3;v$`Py z3ZIUO%4z+Ufq`AH#(>TWdLFt?T1t`I3!D_*Cr3MA-Hbg5hYr86xv?!zNH1bL5XqrC z)17oPANq&do)y1kTymP@XR05;R5Ua{WUZs*I-$m0+pNPyA{o|ktim8eK;XsN3%Cct zs2M?adgX_N8+Ji249How+D1mIp^+3hVM8mhJb;UZ#eW@V=^+r+ohbh+68x|nvK>41 z)gP|EDlFtaSOiz>^zyO>Z}uiCAN$CX30OwnV6O!VEa;$5He|K6ze8NZr*}xHg3k@s zD8Ql+`I_(z*4TsuL1MLIbh=4pFPv9sDmIXtA@@F&m0oa)Rx|bkjZ_5eJlbr6SALv z+bFt8$j6aJteAAB{7+{d;QQ3nUtlnI-};rtI?~$8&c+tREe0$gBrH5T2;80HWCrq_ zLkvrlzW{~9LKGqFB5z>7XXoZxy*pC;ab2=ZY;J`EzM3&%rBB_TBKi?R6czGR_)XV4%&vYfk zcs)aW+hGxyI#!Tg`~7vp9!KFST>Y37vOxHJ&t1nmckV3glhF%VL2kkD1G?!0d%lUK zB|8TPUNJ%KD|Ko0K#IWgZ@@Rh43>e;&H`wkaOtUZL9>gs1N#cJfEu{g{P|@cYHFUr zYJij!=%8J5xuMeg><})rq^*E>VNDRYIjzJlUHS!8A|?0(Lr*E(KCXiU`QhHU=zos% zF*s`5Nn;JO1lxWeRo(DNNG z{8!+M?~snF*QS~qsS@mKf@Q*>%yy1RzpRN%N@A>0JrHwQpgJ&7hjl){7GLUUZ;uBU z4$8%JC$&~M^q_Bm(i{BonbasOWDnN1w~IQvl5voB>_bW(0Z0do40#K@noaI!P@1|t z2g9qHonCyu81m}X<0lq~)>R;06^rdPf=|lMtq6j4!{?7MbUp*Zu2XsVHfXk#X6CRCYo{Lu8g+6hc-; zvhU;S{rP-0jM;w%*}Fc|cgWy{pS`f+fsy=-oa4-xHVC zS1%<>d0pe}{PJbJHk5`RBak5ZTZh7TWf2F1v9 z>6Qo-Uf=4%njVQ~rKi92{dL9m=cnAxs=$@4yN+zze&rS~y z4<5b8=dG=UEnfu_i9{ArN5P--<)t}W47|c;I#WZ<@uQiU zS%&u5+R7sRUWum|hCub|#SahpqInEPs;(LToa(O-Zr@m6tZq)0)o@@xa^$+-kEVE0 zo}lQCqP5Nfi}uTs$BzeXtSt@~9GZC=R9stIdsHK7_19ar_AHI+i^C1qX<6%@#HVLx z-~7^r*>vqG;!9sWSiCX)~w`%Zx(p0Og6aQKuAAtYdJp%V);)>-g% zr0LPm&-owtgMNLhA8k$J5^uRNhU+O8NQqusT`g0^&y6%Ev+dpc(&NX+S%WiYdz+h^ zvy@5-w>++2$(8p1op<(;n3k5-eCe*W*tc)qXiK#K7b<2~A0CC0^$QH_^Ze<_t5PUF)N3%_;iLsL_6Ny+}>-V+Iu z9$gQl8t4_>zaV-iOTVuF?se78z|1#45|0#9{eY!7Q!z6$cxozRZDL|V%4^(nykl&B ztUY7(f!1dGK)e|HPKBKMu{fRzW zVJc3&CC;5s%;Jou{*ZHz_E-4yMUOQv&kn2FzHoZGSMD%r5ll*5GL**S=W5AbH#Sc6 z6ko{brmIYl(a3qP<-Qi)EmNF1AE(aN)KTj-+A@e?@)$h19T{2e`->ycu$?-ZTQBu( zhHA`y$&2S-1(QwomS)*Tvh=%uf1`Eu{_mJ1Q86)f4UMcx1}3KEm6cNa)+;52=jy2) zQ<@Vb+aK%kg!WpQ|G~F9v}bBrpUPjy1`v1dQHWT>=7xl}rpPy!k|PJHp##)Z<0CxVRiveouDn zF63!{(%N7Vw6<_`?GxvD-5l)z#S4mkS9i3hfOrNOO|>pjDK2PT}9O8q-q~_l|6K5 z=>3DE)$7YYV}-0^4`byruP@zTyKGqO7_A`o_}>1U$`t13{quec)(mcwP()0-Y8PP>9Me0*pbr_ETR`a=Q;P5xX+O> zDuq5*+a_)hI(GEIUy>|twZG7H!Xx@-!d(5fK^M?B)<;^Xl;NAItYNnc*|EzycO8m)pPYw7IVB&L@*U15SJ+MI*`v}|2IsCnvA7pMZH>A zS62&LXrSaIgI<{bQRS${qx+~YsYO|7&V7D)$z%L*(4U!*i_P!u9n4t_7;8Hm&O7g< zuKw}Y_qSBEEa?HePX=#0ZjzzG=vba(7GqH;qGY~gUU<&hnyX1IR&ekwTad1{xMTnK zZ{IlCkcM<_3kwSumX=PpXEonFaLTyIK4mC$XE(8)Roq1_Stjw$&cgJ@fA8Cl(g)R? z+Zh<@R*Ev#+xyBrx{b_L+3zJD$oiSJBY zQA}YH7Zr_mTwh=FAL06P?}y9Xb^b=FqC!%s>V6oOHGe}sDKT-MPsy1FM^M!=Dl;8@ zeSI~ZOPzb){0%yt#2i|F7oBa62fIgBKjj)Q^P3XI77(1(tFtt&EplN>Rq~O)*QhB_#e8X-6&1{b(%rxt^AGu-8-i((JV!u`ju%W@b61Z`BO)4DXq% z?mIaZGbt$TI=*Gr9N9oAY+~ZYtTESD84uRlt5ozuE14a6BO@cv*;*Q!ZwdCMxbVA5 z7G7f!^bV_?oT)N7^WK0@t9|F*y?sy2j#Eo_9L>BQ?l(S5KEim1V8fl`rkOOPLl^qe z@zY~>ySPf*^(lnDPNnr*x^by?wp(8aC>!LI+c+^L)Ko7k)Uzu#n#8}hB)is!GtPd} zzc>{4L8^XI^up?&Uqv}UMy(aPgPc^MPBxqsnww0jFRzt;y=c*r?nV3> z;R{;*BP7i&`NJZ9Qkd3cajJjUu3gctH!9_b+r5?#Z!DGlIQP))k;ZVZ)*Y4u<5}8N zBFLi7lGu(7F~ZS9*BPI*u~A=oS;*W$ts>$4JN?XqD_5@EVUty7jl6a1k(8I?o+|Z* z*RNl{eR~vOCWc<+#H`vCUY7zRvlpImk{%;BcO2~N?;qO|FwQKb#cqAtSTIds*vD=9 zOC=TkeouGz!KB?h&HqXRwRK^;I`fQ9?dwA_j(hYd*Rim%{3ovSSZU+4SXPF(!p}DP z72eL{Enh2qB9AK&x3P%W%O%!g_sl%}`ts6)hq>eC#aSB3+J&8^t`;t(_n*siS(s<3 zCjz;dR$n$7EF6|(@R{m+_UxIJUCt>dC#Obp?KnXzc{evfMN~&Mm7U3cKQ>);)H5`U zeemG5iFy3Ng7XDNkHnmxKYt!CZ1=G(L$x|atBj?1t)sK~-8%`lf#B6bU1{Tx=UZGo zH?MuNZBBANckcP+HNUBgdX;O0hHa#!Lahr94z~I5P>a08OeAxq1OCipXJ@1A7A_Xr*w{!)NwrM? za;j7(TD`vR-2HsG>zVb0#L00X*-NwObr&Wxl$yd6yyLE$T^y_pHLLJGv+fgoWv8d0 zxa$|SK%<@uw(n6EnfLFn@?V^~ytW*@q^bG6;mg~5@$o9|A#|I1Hy0Qb?3Y&E_tUO9 zNvns)B>mRy+v%AxYusv2c1FIOt+ah8*ff`_c+38HlzUE^#&LcNeMEG7d%H%0#L1sC zceZ{STwhy3zMY+(*-XQv8OX$MN~$(rbRPSykVVmA1D}6?|8U1H<7ekK2dey@=q^NI z>9AY=Sy25E*a_tsTPokUINX0BMPyWOeo)4-gVW#N-@w3N`C5j?APSs8j`lF{A>T#; zhnB{#ftsuP_wPq4oaieT_M**spd{_}c;l$E9Wd?^5n@L3|cf2`?r* z)f$XWF-;y5Eke;ql=L_vdLTx?;?A8rXi1Xa-rlo|mG$S)kire+C8q4Pw{AtM=_rq}D6czM5KvH_y_A(1pOiYX)DX>sY#AeP=RS3%poE0)+dKQB zlsSvEZmCbZHRR2kXV_i(iD-94lK?1&C`^qg{J^zG_lc>Io~vx|^OFGhfJmfLlRgqI z6$PNu_rmMK$&)AXiQXcI?_*<<4sGer&3tyB@O4vBp>|$EQl7UmM)MW6>*&YC;|7Oi zTjWfR`%LW(RNGiznMP_ZeEO8A&K)=(Bk?WqhRj;dJz^4v4ZEWNA0J;%WjcSFV-VwA zy1dB9<|k$qCCmdyqiq>9UYEGIwP?(~8B%24KXBl{eAfG>rVOK?pdd&6xLCdG zr+y8-rW(IA-CcN;lhYo{9Yt?k;#BTEX>8uE8dL*V-C8&i_^6@&gJ8REDOw%BOP92E z=nj7KR(XHVpsMpkl%RfrxwK6KgWp^3y$)T-M!5zBZ8F+N(qyTIcQ-f}wRCp+e9kk{ z)%&uHPKY-u#ra}P`+-!J$K#V!nqu#$*t72(JN)E#gc?`r88fL71;4TA%PVO0vdn$m zoB3k~`O>Exw6u1(T77>L5hrkucg;InZ-DR2*|WoZgG>rdVF783T(8x1c)v@l<{vA# zAH^4*n?8H$K;%-A!*lh_vQOE1msfuCWl&62@=-_67Q9(;u^m=2PxUPDdFK}6`;N2Q z7+^~Nd{mwX+GN|>?rx*?{Mv;a)cnx9^aMFIv|UZ;xcDo&7|DV+04@m}WvI8n8_{vT%z zvc4}_qCY|D1`c)nT2W6>Uy?H~e&qg5Bl$DoI~H!*>EtLU7>dT zlD@t^T8HGo-*IKdpov;I@Re%kW5*)dWZSHV8=@m3A{6r(K-h@2*8C-BhSR?<5rOIz z6&25Q;uCToMtx;)4Z+9cNPiJzut)^L0J`)4Ummnq1>8+an)>a6DLj_QHyn( zAJzKeh&e%9_B)7Sk56d=NCzCFK2C?oCU}-3oOu3zsif#l^)96k~o`b{IdT zH`LCVN4u4hl7e+Vefo4?)Fx%Zz#}75MI>6jxxMGflPAdG!}VJ*`@WpWlc2=hyh|-2lI*hsXE)(;Lad%+y2i6Fo|g zLQWcnViX08qb{C2>R=>u6ZlFda2d>z+S#**pA%onJO_qH$1?yH zEJ3%-doq4Vmh0qq)7RO!jur(>mXyy_LQIS}@_i@`bDNX0vhrk4F`6j{KQJBz7S(qm zBeM**@80b@H_`+^GBM#qO-)BfH!puL*PNq)`dorMCgg6G&C;vdS`L2xJMlqEN=jk8 z{Cg!YrX!mByz=t$==Mp8fTuwNighY4E6X!{CZE3i@Y*Ia^#cg-k=Ew{?DqnVW(Qy2 z7>;>cSNF7CU0wamnKPU8LAlCh*;~l8i1z7hDXYUz#gi@J|Aaqx=@o_>_eehL`8KWe zcMP<_0Yo8No3&k}98q^Yfc^1z$qf%`M46m0JDdH^5r$3tLin|=yYzL}b#VE_OGux1 zf__Qv^}tR9k3C_C{8#4f^9ql@1(tz<5|N7uT zt0)cv;U@Kxob|x1?f5BccBBykfsORR!<7Gg@JJ2mUoG3-QL)fxk>2F?B90#G5*Qe` zYv)dG-Ru^`+sw=iXq}s8Ic+t6e}l56CO{KlzMk2l$i9{1@Zk(1@zv|s*0tBR0)jAB znB!NBPf^X)APwf{<$W%&(1;XaWnd8Tp6EjMkBg1HX;!RpLo1v>V5i*INQD}LEVldj z1$JwDd(LywGPI|;HsMk{1=*gaYmcEQoH}(1tZ2mT+jURqaFL%|x&!XU#W^@QoY|G8 z6iG)<&s-sutb93&W-EV>l7)OR4=T z&+f#BGu<{+{5i;f%dlww)blb$^6c=BBjn`dS4WO{dwb)zBO@c(1T6_Gd5>7D*D^Mm zXK6xH%Iv4O=)1WnymU3b|2Fpd@TvJff_=>L|0UQRcP(j!|BVuPB-Iw=_LnbTqWk1C zExT)bN?zWv^NF*otAKYU=C0^E-QPW2R#x`vmE3>CtQX%)>+NX_WCXcGNZHB&mn2kx zs))}-yNr;MFem*GdDC^(ZKSFFcUErU{!?#<^`@8&dGq@33J&~Nl(1ToqGSurzatl4 zL5e!nzfrek%fFv4y0J=%HNt|NOpEQ`%NQqB^S>m%@#eon+WzmtOmr4hL7;cwYgPtH z!JEx8i%=)bI9SN|{`wZ0oNQrgnxM<=b@8Hcq3zkLl(fD-yLx)Qyzsh}==`;64fNc# z{7k8TJ{tFX_$pe1JuECq67IDiJ7|{V82%j^ zcZSWD&?EinAwQg_Z}>|PA+aVFA4TiY?Ti;1Gg zjvW&e$S36anLLxykM;GT%$xs*?7c7a3TX+5QpO4N z=ooxYUoTxn(}RF2cKj3`87aWY2@36cZZ1>-1BA6gPS%UR3y~dBGD02{w83XmQdeIu z>f95xsN4U-OGihiyL)uh9_;?m(2#kh-!mF(YwO|C9Jds~2K^#d*Y)_bPRqtt;rd&j zg@1l+#`Mfyy3fY_7NsU*ek0YFFJI6eNqdd!=IZCGn8t10zGu&#zEBsJ)spCS*$)B? zr@L>_{Sf%mF=;<+e!;GM*@(0iMip;q^N${y{6Gb28+ckxUEQTMHMExTztXnevu|Jh zeI|QE-v?=5dGa@tXwsw9Ub=K??nm?D;-a{a(4{{!2MV96CrSbCvxqr${p>9G{{1@_ z69pyZmzS4>+1c5TAAfWO;OG11^2oDcukYU`l>$UoENy)>94{z1u ziU|3;9vrAD#PffHWcYdjaqXWUqg|3?9%KMeh>G041Yv}TpxVTcUj5IjDrQUUe0-DO zdG16+1kMc9ptPPa2i?tYRw#sIwg z{+!S+PpwX&BeX-a7PveQhPu5Ll(x@UTWHTTsP2?gHz+A6+)N(cJE-);ymC1?LN$Ym zZf`8YANv*Y`)9QE;CLFTk7JMkI=OEQ5`z{R6^oaTj}J8)E}VE}bQtqBpscEzmTl$i zoERG`;nG(I&_}g-^RLH6(AS3jQL=hXr2_zgj&A za3Oc&>EdhQNfD3IRb=Cwi0Z(V-@wieTRn~>e?ll=|7&GM&DO95tqu+h$VKenvlHMH zgcuQZ+s^6BOG!qd%d3ANLoR?n!J@yuPMbEY%PC!gWLa7@%gxny>-kz*@;KNGP;sy%2f*0GCobh1WUo0^;<5|8uHt%m^15GzQ61FZ_!nfA>F61RhU;z8^yZZQj>{gd&Xu`DpJ9PR!PBhZ;^F#X_h zdITbbiPx|NSp#$*W?q|vRbhN^Fs16792p5!_RBk$=yGQ~d3p8fYYo}8C@{1S9_Sbv zzB9`Lu0V!NqwIbEKIhJDh{?&xU+?TY4koG?N*69co5vivn~zw#!VGr5dTHbls?*{` zH`GLS5FN6z$vw{@8Qx|Qg*r(@{sH@vw&mN@l+1Qg4|&Y#z$^PL$8NlCFnp+e|?F88ntbE>PXuHGP?yo#VC2@BGL-Gz2` zHa5Q=cR5JNh;d<#s=+upu?3(mN&FTsqnG4eRH#@gm)0e*gU?r14V zN%v|M9gBae!&R2K8pt}(v$_TkY_sC8+{P-_*wQisqL`AB63W(oaTim0`74BBM^4jo zKw=u{ZvvhMEX_jN)BE#_O2DEj-3=XLO@O$*o7)Unk=eO9B-qfr>widjgV3N7u|!&~ zQ#a@sFT15?WE@`MGuYWZeVJwV?wP4OpN>m}V%*8{5lieMBAy6&bfLA8vIC&c5^vqQ zg%;@5FzTSafau6oI2Br~b#)nhQeKr_hLk*W`}W;X$L)W|bxh0pFO)0$%0GrY4Jx>- zOk(K)6PW1PpF9B93yhO*)Zg<9y<<*);GZA&pIs&D6WU)OG7+r`@5!B=T5PVa)7TM7 zQeG@5U^LX$zF$${55^KX z8-9ol=>Au(Y+5ezadXowyNsZ#^jnC^BT(nauq7P#aY4t3n0o%~*}Wh!KE9^TPJ{lm z&B%P9&*Ksj`cf5XikZ@F)@4G20ThV+0FEd*-XKGu^tRTa|3o=x=zE%ZH#Af+AmHVF z#;8Zq{@!Rj?HXgXJ}*1+eEM+}Nv|ncwi+}wKR-Xz5~6HUdHE%8@87sOIyM=#p!7Il zJAR|*3d6(CXJ==#H?6O)!$~j^V+rmeOFKhloCmDSwR_RgHWx0Kq@AXuE61ocdOOD~i5h_2Oj4a)711{ca|vk-@=dz{h&I`WabS)&7fj;vHF8S;J|9)|awg ztjvSy{n*$zv?f8N_yCY0x7f5Pl$5JNkwQl%Cj&uOjQ4|uKlb$+NaCpb_wV1mD|dF= z+=mYk3H;_pX#JN4&;xbIrYAYNxVW@up0z_Nuy0|XTG+hQ1bHszn1Y&G;|s5!_-J<2 z&6`^?-xFg%e2+lkgCvrE{q`D}{nj5hQpR$?z4X7lq{AIu;?QmjI^W6TphX;)PG-w2 z4Kx3rr6ou#VGWbe5>YEb0fy&ZHS7?Tx!3lYdrv$+Cug>4`SW<{`*4-W@-2t(rjY6m zQKT%9x;T|iO$and`%|me*s)rr?H@ke>aljr;@XZ5s>tz^Ba&gG0z?tfjfvb$^~Csx z4^KB$7Z-b-JNFI6=`r;3Z?+DS#lNB?u@R(E3IIJ(B9fVo96j3pCg7_W~}@e^JgGIa^IBf)*O+_-Z~qy zPD1}JzVIB)EAMle+B2$sIg3v8g-w@JT=i2D+Cti`{Ra;&bYyFp z+5peU?cw0$*(lbHg^;7epwkCU}PH4ZP+r(MT=>P-2DZ^+r~g=HS=bvTfV>kF))7skF-0X^yh90d4zlP|};mSAEGAjIp>H`f=>uA+K zrSQ1_Z{Ff|D=VwRF8c4qfI%)|5FnEPB|m(v76gbzTVRX!92TSVQ(5nto9*t0)HdWH zMzB}4b0A7~*VGJVC0|xiRsHb(ed{Z>9lFS<^P@ZVRY3yK&o>r5etc!FS@!EwtE*td zmsQYwC!Rc8R$5w`pU+pExlhW|5IK9+?bXfOx91lZ*+lF$u|LrnGxHkC_4^436dZH) zh6@b%0&vh1j-%n^eM09HPM@X>4aUaY&B6k7rnNUlh5^E_JDdgwMNS0 z4FY!lGZPexN!~CADDRn4E#u+r+<#o7r_zj=R>3vr`sn^Hi?-8_#1w}LkHT2@Jih)6=*U!&d5-^=O zcVt7IywPE>H{jUFPVy=q5V%7afQWOCh^XiS_9a{zh0mXlL)M>i?btFKvwJ%jje($? zwH12K!y+zyh8!#JdHtL^vhTf6?U1_qIbG%R>e}Sjwld;Q1AU>}+E^~d{|-vzTV1WLEBo!uqt4wpn5(07ga|3Npj4Ih@pal0<11iOv{g9_TtOj z25Z4-Ag)aMmYJBCe*OBTqOa)!VH94-WFhO*Mn8F~o=;w3nIkEB_{GO}+}zOmqd9D| zHaAbIWpr|OcI|&r|FOjN>pHYX?t<$L?guwfav%z#V3K2-1;(^I!pp~(n3x!yr_pqG zmNBZr_g5V$ro>r`D`9q=tb7gv}{*v*@xATjdQ1h9J=K>#8-+n+lp&uSMW zauM*PF)tuYCsWPpGc2c)O~3^z8QT8-E5#Q&@w0ay>J%!slUN*(`mh2q^BK>RWXjwp z7S-w|H}Bk;f?VnR8PP&Y*PR8`ehZFhsO_3IxJgVcW*t zj%V4V{m$Flj}8wjP6~-$WY}VAY6_uKags-P$KJhaO|mPGfn_o?GPLGgot@7{)c;3; z6>a-lV9!eT{=2?vo19|J;;?OuJ$ZpUFDoDnP{YT|Dxdm0T3kw;swF!Y12vMj%T+dsi&<9?g z9XdloaXJj_*_CjAvPpT`%gGU(v&tOZd$_&Is5?sklt?M`*-a+D4Y9K!8Q~>sjZakw*)6zdN z-*G6s%3udUZX3z`q%!K}SucuA%#a7B#NbdT$Z^9-7S1O;vANAa1{OUw0kyTM&B?M;_UsHnQ)#bo+YhODj7SCyi=|TIdwe+ zd52C!MUdnsMCrWm7!@}@SA#|=mSzJUWrL+Rs%dawHNNLK?de&RpYMF};xUgSH+HbH z@@&Y5(u5wc5Lr}ZJpBBooa!~s_?3t)_cu;o>zXfi6fovwS-A0eiNy2Q^BLO-N;jez z75uTP8aC#Pkc&Ick+S-T&m^pI&lOZi&(W5Xv&lNl$$o5P!rQK#K#RN|5Csgu1_%`vl0*ILNO5NqhSA?h(}MwAqLlGLN?F{%dDAD#Tu!MEpb0x+|*o_mz?F z;N;I<<-PJPwV&a(p67*${GRn|hE%5H8!b|%|K@C;NEQ7LV1`e3J#h^L-fmGxT{$^9 z zLJ|ygVREbY8b_bM4$ptq+-+e6uG1_e8MgenjQ zy0x`6SQMikJtCPz;l$atPb$jz+W(GmRE;>=xgVebYl5%9*hfdFoy3)Qr_P^{mOgqm z-V+-NJONbvP7s~v$UX)A!%sk~hmG0&;zh?VFP9)3V7wJHQLHK!MaHuv?VeMQDw0^ORq~D6dr$S1Ck%%x9(=?79I!!- zv%_5T7H4&WvrYHbsdr+IbR0Hi0)tP8ZiRh}35kj2g&sCTxiw!t5}zti9wr|zXb=cm znmv5HAJVdm`@!>F-Qc3q)6-|ntBkKF+QiF;@#b*mop!GvCK%6#Bqo}{lpA0~}=-B2VPNaQ6cg5uBk8ToBw&qEs~O=4ybEapXfa z63WK8PDM4+b!$>WDg94uffp!PM7CqwlSW{y2-^qh>CRodAX7!|m*)01t4lH35i$Uw zAy~h)Dd4H^uXBFs0qow2b%$pEbuCSv!fb?7jnBeQkbc5=^O9VoeVrV|-w_eBme|F_ zN}?b?R;AMaDcV#s0ydfmVFU?=J86+#1u)tuCt2bS$^z{RT+TzIYi zEH!Ug}IpZUB?Ku$rc*;x|5tT;GK zImmG(ogs$x5NY0$hDL-QAeA-#bb?eI%Tv={ZCn4TF6iNnbVrkC-4 zN*@0jd&OCCrm7J@7Nc$HuzFv^bqSN#?rm`0)xP!uq#w+jW3%-MqX&8$O2M z`80ZSlf*rCt|NzM-rb-~uq66(!l`qoj*lH6E^v&`yjqDGc%tTVzNNvzkmN8 zA3w*84z5d2w`p3XyAQP9D57EWvZB(5fA;^(D*t~pIAfv)`NkUSHqrjO60X@xiIaOKPykvg~FL~P2F#u-cHd^ zv{?$icH>5Kyl5`CKCtH2Z||7Fd+WLxatdhge-W@zN70uIb&|PjDyc{si|!{J_!OeD(a9^C6TvF-y-emrrdaVJ6N)uu{V>*buxq|1#;cqSu*=4+DW=Cqr_J!e?D)u3V*W} zQTvcjy_~=u8bB}}(7Ad=;d>zx|8;oeoKx#xgXb-x#-|Qb!1PxvFGHBre@JBiY~ui9 zAdD4KsoTV5I0InpAmx{%(d+3~YCmmF93C{4>!lvd?bn6|GMmyTU#S??|vl=3`alCKh-@3Wcp8fe?|hJ&UD2E1Fk(UB0OAANn0WubHcWfS z89MrCT$w6xX7#UK=%l{>XQ+#CA;JoBA>OPi;1bfALB6pbwDf1WxFq5}Sc1UjLs;qQ z?G-LWi_OE!i>}=Xs()QRG@9TNc#~#^-tVHJAvq*$A0BCJYGRYU%kH+%!PG^HcrDr*^7r(|fEt$gkyl_4!R7zBKzl&VT6+rS=3pcz19@ClNfv zju6RPGq9=R%o|P=xp&^%_{LDV#=#uO^S_(l;0GE7mk3|^gZuXZ_#DIz9H{B)D#PwN z*9lo(?gBiYg2KX&L>%rzsKX9|=?N4%Fe@Yr?*C;8+PQx}^tU`%z<2N74T{VgUwN(* zKtb-&sO>+Yd>ckvT#yQ88`n!bahHDmz-M1;hWw-dl3@HyL`L`j+Zcqm{14vEyP)sw zz;MsU#>5;wUoK$&a%OH03K3Qo{x=N5p@kSCsF;xaB8|E1+73QDhw4RYwKdxfhG!OQJ4O*T)yLCGRW1R#5iHEn17QwG7jZnQ`8Cqij16+bb=Dxb%#`&zdwW$$YtX^&&uex3kV4G z1TG7l-Vp#^K8i6lEp0QWSslGdi}Z35eRkuZEg2WvQe2sLdd*IC^+MLwk=QC+R>ip%-fGyOpuD__AoIkjg-#H;B3A-f2;@;I!rQd+n--$Yz$3-s>E&Vu9@s(*v) ze$@|h+Gj5PQi$LXR>9>h6*x8nXUf6|7jcWu=>M{SKV$p0=^g08w>Sv{zd>6O%ufmm z*WkJ)D3VmCJ9DUtP;>4lC4B*xhN*|BL9HpmEXo@7hTGr?ubk1m7?Qv|_E=A;tHz3Y zw>^CH$XYvy_EImw4yNr^F#8RmENHRcr)UN)gz*nl)HqdES6h3f^19-#C>pNhtwvPA zv!`UXe}#{y`P~*Hdz>@Q$jDIe&XG8A!u07yEStLfH5TP@LmK^2Se_;)Cg2}nWMi|? z)D-+L!iM^8c&yMl@S)*hVLAHw@i8$mXwiW#uI(kbgL*qc+@k*tcyRgmr0QAGvpT&m z16RDQ8URiEtAnabOML+Q@M)DjHelRg%V5s*WUBn?Yw*+l;TPXc2IRy9m}yT}zWB!v z*ZMZ@?y&a82(}G6Dx5de0vN*yuusyBk5+&8x?-`Ax3XT)e9OW5IXZG5XWh5&EZO0I zjLzfy@nP&ZNzy1y1F8w093h(;$e@oa?3pTS;C49F)y?G?308UEzI}0peQ=j#eOGf( zizVZSz@T#Gj8$eFe+A+De&7En6}1T`$utiPkqc=l*ol` zHpg1p@)g00`32Zp5SABZIpGremu26Jxv5^^+3nlg$QsU3HasJ5hs8))$vigIC`u;i z&mdJzINrNE;}uTJ3x}PD6%5pFRwy|Y9YY@7ZPbreYf$gl8$CTzJUqAd&5`m4Y|vV7 z-fW7ZnhmiBaguZNY+xQ3ge^w=#Z3R4x~KLipb(~$i)|%Dk;C&+r}krL$k510%V4^& zSw_dFd~9qCrl*fN72{v69JnF;(B>@^3j&pcT_ULg`JcEkcCQlY_OY+#(-(p4~ z-j%0t8qAPz%{E;ITLLem%5*j=*|8AbBS$hp{AGeI__ZQ$2xM_BW2f><48wSo%ZWJ_$DJax)GC{%(nTQ zv5eivy&4+L2y<#pDG#=s88&<3gL!E-H>zjCB$W`;rsq@sj}*V{z7;d|T)#JxrMtU( zhq;uLl*)PcL7d3tt`;^)!!khHySd4TMBNm&)z{mg#$Sn6pHy&hnL>_5e)x(^f>UEO zm!@F~pbQnPnbJ4_NrdRbP+0?0BF8=2EkyAWOV_6`J!?D?%ZH>&VwtOFL7a6Mo4T{H zqe>%e#vwSZ<&S9;WG+4m7@KXTo(rLrI`ego%%4fu5^=jL*FJ2bH+1ZvNnYEY4$Cc< zD3DB{@>B5K`Qg7uD?Ewgpj!&5n*Ch41mD=fA+{G~i-I=KmG@|=;2d;dGwJRg?DYS3 z5aU7c|JescdZ^~`T&nD1L=&r%bq9JpsTD<9x_3_tgavjV!o6tT1d^+exVXmIv&Dsl zJT8gArC<~%(77U%bv4lkL0$$1{rNol9Sp!CAamiQgKrEcDxl>~qf{5Q;C3L;Z$TJJ z<`)9Iz$UUb`$51Wa3Ok?iAlgbV6b-cB7ID89uZJbi6hm49mqF7Vg)AQm4YqD&R%;tU6R3g$aoZ<>v z#As=0fpV_Xv#GZit-|-0AwQ;NC9`Gg);-70Z;^%y|1j$afv|Ikyu}eh5;|0oll?BJ zOOB5}8SnKP1rgBTISlXdA|z(?nXm)5=G`x(%Wwy@LF+_3d<-f>rSM>@7{?uZ5D{_q(m03-G&}{kp$Cil_-6s5=hqfH zP!7-;LLuacR>|3#H9|TfS(NZO8DWVDhkSkvM?CN^4-3V7FJ#fzwY6~cG@FsyfCdT_ zI!oiPD$(3HgOYrt>nDwFAL;f3+>8tiq=Ot5)t3Pkv!qrbhZU{jdN#(gl82fxzNaCx zw1q~P@OYbK+)YGFG!A?m-EP5PjvdY(SY+2PFS}tuo>aybg;rLz_S*zbBq;m~W9>J% z&q$tDRAMH~GoVWo64+zg)88KmCAQD*Yo-5>@p01qNw5o%=W*MXZF?nVP?JQ?et;Y3 zAm6U?h;j6y0lIm-$NL>|F8 zNK z$t5HSyj`cn&D$F;4nfh?m!J5}UzQ;p9B1K5%VC82gXzyT{58FCp`)!cTkAV+>4N+= zP<2RJI`J5_>v7!{l$GowNJpvNQBe%GOI!BXE|UC+IN0FywW=R9{iTQBK0wH8@KYco ztfMzJzRjYTIa&Jb*;aab*RH37NY}9*>qD@pAg3bQ8=X)nvXj?VmpmLbo0^*9m}hUZ z$*x0wW?+e8a6Ah8`6(mV&jzs@!P!g{GRTn5f~hAckA>u%LM(v7>0%_giMg&lqK6U= zyQY%fw};rHsDfpTJo+t};kjopOPs8&mvNT8kj5bfjjC#I9vn*daezY;xBu}Z$t12@ zH;3ls@jCj&-IP0Pi^d7!u>Bc$)?hpa+`A+IO_&%Uh)@DlZf|H%xfBKyR0>~9W*H4O zf&JFvpN+iEb?@Fm5j3FwzU3;*#m81q87hWw=oOlM^~SnC_Di&`C7NzhlJ8%F1Bgpa zkjCM7k?6I52SA6&$d4cGbpSM1uUr9YYRx>$+Cb4*hyqbsQeuuL;48~XUhDVyisLb! zlReRyI2wyDUm+cN{?HG{6*>>wq7Zn+6BGMkVngOhii)bE-Hxq?TYJE4?(X4{tCdC> z{pWk6Y~Qw<2J%Mi6y_pA(vc3AG_z^(QBs81ob zQpYe7EsB(Hv7$g(h9AjkNGY>t9E1pU4CxSK<>wZNso|Sue-=R%MwIp)#<1)x6ekxk z^+Oh-%BRnS-8%2vto`(6SeRCo0K;Z9VDSIiT3h4R92`OU3i}ry5xcbFibBDT;gtBx zx){U)ZnVW-1ti!AYeZ*fY2Bfez0Nhb6~hUOxov+-RLd6ChrD|rkfUpvWBgzvR{$Gc zT?`L-unL$TdkQq+NO>`KG&Xb~_-FrphKR%>{Q1BCV4Y2aHhsF|K|m>KKxY)y6`sgj HUip6j2uaMv literal 0 HcmV?d00001 diff --git a/data/scan_perf_zoomed_in.png b/data/scan_perf_zoomed_in.png new file mode 100644 index 0000000000000000000000000000000000000000..d9f63ac53db548e069b1bd704d4667118ad3b304 GIT binary patch literal 22583 zcmc$`2UJw)wl!LSP#~c|l$=qb2$DsjB1)1dAQF{~1Q8@i&Y*~jNQOcLii`phBuS7U zl7oOm0SO{lfKbGLRrfi4&bhbmeQ%66-g}JEHlb?Q-rxS#T64`g*N)P@ewFe#({Tg> zL8*F8Sr>sItUw?L!bu6?H_I(QBjJB!uGenfMf$${zXiwS}-P%BNJ>_L&bQD=4hqscLu>B?NSFc`pbJ7Zi zQE&?z@uAW74tZV;xp~waTmnC~aX$y^SMEM0xHx{nc5Q_HTdJ+CZBO@?^zDsQ+mt+g z0SWvWXM))`1{$}VQxp|<=Gb)6opf<)>wQX)M3pBn720rFn!7KVm~bq2I(Ipb_?XPVU$&UyRz_^*FJ8t3!xuMQX8+Uyyq@mTrR7MmgOSK~NR zyj$EiCBCnad&6(!B$}=riRNZxWZcXdZ3s9bLQ=Iq>?fx`mwMs;f&ioOu;D~)T^%_%iGzcKK5Or4Yg5l{B|cRC@kqEH{c>lcM@^Ev*Zg0mvdGhY< z+uOx*4}LI6OcE0l!{xfVyKiE##_ocd+{72MHyU za)Gn6v&3oyQ-e;HijF3Ra3EJJ*H7*A33KzQ*RM~@Xs^|7TsJc_E3^3=91?PhIh};< z%$8Qr?q!|0wpjLty`S%42T9!M7#i}Jto1IxCBQE2${`mDw?s#7o|&GWzVhzq+pDo` z5h!+p>dF{qsd{Xu;K?703%4qrvwv14B_-_}$4BT`XYSs)5nE%w6HqLD6i9W~+B$po z<7VjOd(RTitIX(`rzpj%scC6T*v@#tLbqizt#sL&^*(#QoqSM!JL{ZvDV2^RPbPyE zcBVrhC>#nF$KX0(wZ9AxOUlZoPI3ht9e6H3J=G5Px|d{OVIh`H?v7EYJ%k=c?NkKkB^r(V$3U=0~@oH+4s?IsZPx+ z?hCfvEXFx0M^i@!?5#WwYxB$)*~bFpL+k}UgPP4p_pi9`FPR75%*m>IuxgCES>(g+ z6c~P49>L0Z@xk2&{{u|MRa0G`%WDv&0|hpj*G1}h4>t$22H)h|a5$NFK8xJ3JWJt@SVWEaO925!InTh!I>zB&gn=$UHp2|`$JO@}=SQek} zCPPNJqsGUl%^N>CB_}7BbiyHZ@tLk|8rvPsy-ehl)rg7;4_)08MY@&muRNj?G%Pl$ zbej#j{C`3GWh>=+U>$KOt26>mxG_4Glj(f3%ax z1nWfJ{y8K0qy76bOfTvlTd_Yurv&_JrH{rb&Rgo=#2gw8WjQk)W(YoCi@uGVR~Spj z((381Wl{FJ;&aYs@l{rBpQ*2fB5q1%pT+o3KQxyO6}V!*A}jQc3&% z(4V_-iwFn^G*lDLnLf7i)3m7w!fz_{Os4inWkwPSx8maBK|}xDP5z3_!H;_EyDd?) zCvPr}J%9eZu&@wzYkq2tj=sLKoR`lJNl8hSw;#NZYjkvURJu&r#MBhXJ}$hp)s|oE zA=bZhWOAlvC`rsVM=RrcTLgdJb{~nI^c+zytB;^L-({Czg`@qA%*zp&N^#hnKIA#u zo<6*F_rG^T`ZyDCUq5Q{{L;bh=F&iGYbykICdtQZ51en^W43vmU}3@Q*Ovmdt>u3ZnFb$I_^^_)qSVr+6g4GW7# zMYgld0Eu2k`ttrlic9`il#|NH^(4_NW+zUZxFqx|M>A#Nb4qSic4srj1ivZroF5w> z_1u_@K19AeXS%z;(KDdHrq}=U9~%Q{YK6&AUTcM{h|kFd1qBUZd(VHWRHkB!1p^$91_)>{Cuo4iy#EU~57^5=yV&Q-ZKoie!w( zWd{Rwb@h3dfzQbjdiRC9{Fx*iS_Ev>_+;AE^?yceO{h84JLGZxal3Ju+QU`%?zG-m z&=LDAV$E}06)JxgRe4ktnZ%dDR)7E_H-1JW>0?;RUTg$)Hn$5DZs9NLti(Y^!B^?C zXA57w3T8`_RTQ7IBNAu95-ie6=yc&sZMx~jZWn&h6?i3eb2&l>DJUgnc-Vc7wKv@G z+mNBTwO_ps%|Pka>iG8bhmhrMZUrUuiH*fXDZeWkQMWhp{1uiZ*7@=Rj*Bat>@>=v zAEaaAf1Qm+qXCfESX=jXLt(rKIU}C^LPt-}(Q4%shI_KWP`DMFbv0J^y?OCh;*7Un0cMpGk?) ze0A>OK#nH9IK*GXZybs2uA7$LoONaQZ!86r+w}#@q|pnT>8EmOqnHodj|46W};d$zkV>TxM1TBrHR^W@yd@$vVQ5x2j6jKTKYOO^Lao+V^GhWqM|L?UZm z){G4L1(}diwq7woDNJT0>(H#W0chIOcfPa9@3*?O6mE*IlS= zbhh;olnJDRFp6DXW8yOaK)qXA*iXU(PI>QDT?QWh{*iS44oTgW!`+pV!4wmyWuauW zqN1XJbOv*^*q;a-eL%ARn5?@fEuFZ0d6m|KIY`#|0BRHzI`h->2-O&0v1h>j}Ps1GWT45F78c!g|i!!kWZ!n0f+K$0FWrU zRhh%WA)ISv0kEeiKsL}v5o)phNby9TUhWB6+NHFFn&)uAuRU!AQY9P>l$DiJB%KmJ zFPc7|)Sy3WRBHX{Wq-e}aZGgdtKk<(Pp(Q)pisGNW@>7kf+e+=T+#*|U;o@OH21sW z6l<`+h1rDsq&I%qWq8VW1G0zXkJVKu(dv|eyRpW6ci33B=)ZmYmREOovi3qn!r(pq zeV_Vp$7vniqmzT*w6)DmxSst0#Q*babzfiK3zUq=)<4`s|=?AU!=630*$Kvt^6~mg>0ew_M8obd_ny-nBI20`3ibZOppm<)?Op!F@ z$nqBh%;-~Q;C-sjR6y(K`b*1~0lQ1Q!xfw9#S8V5FLlh>RiHJ&*xnqdcq3Za^it@S zfuUj9@ufbU_qTEnw8t9-E{BRSn8Kxb*aJJb^Q@PTkHAqJU;auTNiW?T5n?)Qd-7ok z^ar?MtNNMy6A0;GUj$;;{nuMY%cO@r7ymx%l}pFj4%Nh249l!nFxf%|0_S} z%!k0=U;h3*Au-X;$?5ah0<;d#o;`zpB-;rBrrcpj;Of<@b%S&WL|~Z{GY0B0`=%(J|K%$9p|BF)ghqVGKIy z^_N~)B^@0d2L}fOgO@2O%taqRe)RG7Zf$FmvhP=}|6KQQn~$Z?tPbD9l{3-Oiot4N zv2OF0ByjncdD4BgwY53o)%G@*nhSlry!3C}nE(F$;lqcYD(xY)IXf5o4s~^Da6Rj_ zsVNqOw=_iYvVX1h9+;e*{Q5QZW;XG~i|?7GoH5lMu+O2Nb#Qbnh&v8vMjGyg{Q&7h z=UI^cUcJg$r57@GR}SCHhi0Gju@^Q7I%EjQ+hk|LA@=jWN(%@$z%7L3fn+MT zlK=QIMR7jqtKah(nb>(%q34@Mv`QPQq!x6dlN0$<$?Y*F9?3U%hya z2^WLmuyJvEIyy90j@g`?>C3tbc&EF&yYp6fSlH^<>J8|5(=K`%)EnT;Gfohhq0p0| z!ee~Jv+OzPvPV)WiC5d!GkhDf1eQ=f+ToXFagEK*&4YvDyu57#191jrWo1x*;0!;H zoxO13f`o*GlvE{T|6L4Eq`5!V({6(Qivjs~)T}-EW3~Gd4C}uK0;))H|^@-xch> zW^yomOtGyc4t4F)rAx}nErA3?r%yyRGz36%}dmr~1`B7V42 zB~b$GMmKMs`Bd6D(BGd(?BcSvyzG!RLazKo0*6>uwFJ&(F%5$y2%NOU2GLo-kEnmK zF@X+=siff)I0RZr1gCgW1cwN;eTqVqlfJ%mxAFbp;9%+by9D~l>if0}T}d}@-h}ObIh;cE+BL63+;CE`1?WySz5Sp;|4A7 z_1B1B16m48r9U|?NmnBfv)r}5+WPu0l9Q9;3A9aGx zz&M)Ew*xrZMM`zd2$V)C?kPay-7f7!!wF_4|d2VtDg5j5Ie0zzL# zM<=Q>`w%5XMXNw=KzlY?bW7?uV^mZWG=Fx37@CAFVw$EoqI=DA$_Q@Gg%GI1w~UPV zUiLa%xe^vG6&oA74Ozj}6>aV5xeevgp&{Dh8r=96{GCi%|F=*jp=?8>%X-f)Edlov zuu$6!R1Ffv=DR$|-`WVtEz|dMd>BGyXUYzd5sJWxDxkO(9Es7)O`nheSFT*Sa*`%d z<>0TlO#b@d=rw^zq1^rdgNkm4jEsta%RJq&f@WOyswKpHRn zEgrgEJUts&>Z=8<%n^o1HNXi$hYDs`P{q7yRK&h%%e zA*JVGkBr6e@OC00IIer)0#Bbl4UhslNJXw^J9~R=t*!hn0{~}-vA`BhJzT4U`gX57 z^#WXX)^j#FA1fQ1xAoEl2=}1vDPNBKjwoZ9Y6QuZr?|$>PJx@@H_~6e1x|MAXV5_qSFh*#q&4f$RIcrOXO z1HPd61~Sdui9uMD!s!Bu+SJ@UFffpI5Y;s2MJzK@YCndF$S<_>S}9ezkN#)#~F^ybtQ?xuJ8K%d7{iV_BQUV^(RO) zW)D{-jIXPy)wnM`KPQMH3{HwdShT|iJAVB5Yh8PwQWI~WP_g;>`TP66kRVz=zdU>M zQo;+=Npf{HwI}Qf>4}N$g=3?mTR%qicq>ROo2MbznP%ogfXW#gABT-BR#2|^c|V4k zsm>2Nl&H6@kQH+^lH8zufsJ$e%o&fR{;*HG!5nZgdtS0QMOnC-@~d9BX~1Q4bac^Y z%8hk&GCQ1-qN1U0YpAOOBYF=w>DbsFXzS+eL#(W<_@=b2IT<3M3Yz=rwDvCw|BcGCMH&sdeQUTMpBrT z!ogr`>wNn&Zs@UIxLl43@(}cC&^~vJ<_;y>^ytWw(1s)IFR#Nm)Q(Z53{L1V?$ZPj zjk|xa42_w*tDBpEfWWKB#>ZT;KAV`TDoA%XXx!sVO4gv_Zm*7C{WMZ+n)PXKdu>3n>up*Z>%;@7NuOQ{ z-S_l#fu`5U$OzKTFB%fW#0M#M1Zn6xmH-M>WMm|Rh((|>*Dv6&A3f5W%z<)LI6wa> zp#dQ9$+K$e>PrT|&@f87P8XZLx8toTkRd&O{Tu@!PKq}gS9RDA?>MdDs^bJnVt*pziKdh9Fa$AuMMg6txLLkR1An zz6fIg+w0~GveU;Kw9ZAkx^V@*gv?NCxgJQnFJH~^qazAl#DWjr?)8W32*e3 zOu^vj#YkAV zWM^lGNAqyV5p<6(;34kczrX!sG^;l5>C>k-%WR<5En)f_6?Tlcc|#?=5C{p_%)lu> zRm=9<-Ly22la+-`fZf=rtE)RDb?dDanJM6r$;+T@K!yS_;28>0)%5KH(Um6o>Ofe; zGyf$KSlHTnehbezXc(__X=`cOhpzKPRA`wD;S~PH5i-hB&vXzYs7Qs`W&QWR01^5o z)Id-`K)ekcPi+E(Cvfkew6{n*wPapTe&M`sv^3GAlO zrfGrWcM+)tB`!4a?u=V#`LP$-n1-au+9i)93xEfMqm3PMoUn|TkC7#=*av>#aE4~ z(-(+&@`Q?v3~*g$fv=w*yS#6e%apuvJ)m*Z$%cT8ca|?f%2|AettJPUlBF&vD2UU) zDsoJ^x>3j<#b#l{6}SL7{=Z}MRIDU~wzZJ3dHsIf_T+%65CKIf0*O*4o_c)jE47t8 z`g*`yiUL_7AJ;?B-w(RBD_is0Q4Fa-k=1e?(TIR! zC&J#kRm(1`eLDOD5rP%KK;I?l=}-vm7FjWxI8@*r{0Rv|98;6Q>EK^*54HjCv5Q`T zapc;DKoEMkx=On*Y6Ew)X-a

%b={NRC3my-ZNc_V@P(*g8Bs+?kT55HLA4m2uAg zq4x~*Y+T&jkWbYsS;g&o^KTTeu(AT)?%q%@BO4Y)G!#a$TS+exq=sPFo`M`gG|Lv_U2}wzL*J;SdNwVHp$Q{t);g2f; zi8BVSN|`DSRb^{qv#_+3w`RNurR9I@x3MKsC1SvB$$E)MNqNFv!8Spss3UDu=OaN& zcIWnO5GqW~%!=K12#y`MrIY3j{Xj&MZqhgx5)L}nD;@!0?l8=jJ zK-t{l)k>9CVy0jewE~0)++;$Ac_PqMuo@#px7vQ#E)VAAYGug#?yRqlS2i~_;oA>; zdpthJ*@*HFOv=6~xFq@m3{r&yFp{OYiz-J4oMW^z?Y)e#;aRn2EbLT&=#&u&s6UYrn)Hd;SkZ+v7Vzi{5rRPJ- zHa0fqP!WvQg0#g#f>Lb9=;W%5TpM1_ef?VBz`)qZC{@<`dG+wek2ZrCom=nj>eO}w z9ByPlPmfplvlGl-)h}iQ9I94kr)X~E{mm9grwr` z=yNqlhhyN;0J;N85tth8uM8KBzqKZ>m>rbO+uy&WqVf@>j5;Wma8JRN;85N9=@V!| zbBl{RADLac(&gR*lwp-|N0XkQrY?n}ffj-(jCWR{VqjSy@{wH`Daj3S!(pu#evq=x z(r~Nvp4Vj?8cF>FKU2#Ig7E9U12^N(miqDGf$DRhBKzh+2(f3j8SUivtJ6DJFp0m?bm2Bd9gmEQ*w`-n_Z@y*n+BY-O}8_gUeH zT2ViAA|URWnwZd?Gm*AzI>tS!UFlY|xRA<9CV3r8z*!@4MGOBaL-UO@+*VZ*IOQ@fYB3P@8Bm#XUbuI*WrE^B5=p0AbxRkj5Z1w6Y zM-lOzJYrMo_Gtne9zr`BUYz227O?b$Uyy6opeLoEu#%?(t)#itG(_n`5^=G`9HMdh z-X1Qjs;a67#OJTo_dzc+t#nR{iJ@!_iC+>Fqn&TDHU2cBh{edK%|Uir z=Ya93#92Vi0Ur*eUOD$gYSZ1FoyW{lPX-O|I4w2xI-~}6 z{~b;dk*BaBtELwhy%u{j{SS8YYCFK@0WOWfyBvgH7eb&*bQvqRZww@`v9U>=q+wtR zB@&{(B2B%4Kl|SJp#nqC#oifE37`SJLswW-1dZwlsGZ5qw_RLH{D~xNx>J{ymv_4_ zu8E^NYZ{&8ODif^rJPffGs+2jS-Q{K#ql)}kY7KicmM_`AQQ6f2Xe5)+@o6TdDUbd zK0_|(kH94Gvk$Jlt7&oibnvT<9r!koDaS`gJII-_-@2EfJ?NXr&dpWN)>TkwXlYUI z4Ez_E`(;f*7%WCUcKV|$!Z$ZXc-3I9#~0MG($Z2$2|#@VcnJX64j@nn2_+<>Pu<-> zsXV`Sy1dkWfX9Jj?mIm=-WX1ux+0`*Z_i(04(<}Y91R`W2DDoxaJVlx0m%#(-h`FC ze5Sy#giZ1;gHym<`?KKSU|7qe^=4FkeSP^6^m{s6Cli*&S|8D8$US_SEC$pkxLeLQ zh!wsS)%OPp4zeaN!o6u!e<81uJSrwOb?1!_K>Nz?{Sa$q?~@<-x5m<) z8n66mDO?>wT<2sFe-YUfln8qg$)l0j3#bROC+Gos50zHWR}7pa1p#FYsO@FlPI?S3 zVafr39qgRUg{h8?CDMs5%~K{6on zgJbKm?*anS$SS?vdfeeQL|u_T6@inM5VL?-u_#YeLn4uwnK=dgbWWphuhfql zuV~Xkwt(!%*S=`u#x*aIr#}W_w9Qa+b5?%7fP%u*`1q}RWoOi1_MVw1V$~-rrwTz* zauvHm@MGej+w9E-()Tv!R_oCB5a^R@en2uicp#{d(Otd?diC&Yrw1M$*{T0RKf2~4 zBO``>)*xsv@sxjn{&&$VS<2;HL%zR<$gB+eS<@Q#kB=FFGm=RR_z%>%c-`fz;(abJ zs0384t{m%DXpOtjiM?Jsk^zdb<~5bnfQD!y&5p57)~$vKhpGT;;GQ6uFS<@EK6n6k zsth3f*|lK-!v(-aCpkbQO<>9-@fWUJT5kFW$&ha1GKtr#*CIr&O|HhD#kaNTAafV7)19ns37WIfq+(O>mY3o1aN1D|8j(+)41&|ct13IO` zJW>7Yr_i^+*GY^-t^s@9i&g^u&wZnY6&>*HG`qI@E0&Y&?0C1#yd(cuSF*&TS1}LP z2Ai8h(!u?~vT#w5&o${!MdbZ9Va>sqLi^h>7Xb>|0hl;9>rZ>)1Q!<<2?YbkvupbY2j6CAUsUR_^jo~l%g6E|eR5f- zM7*@wNoc8Byxa(;d=a&cGyBS5P4QwhF2-QA_kSR-E}qk;s^y~^n;zi~Bc4IaE3;C$ zx)CT!xo>__l_5oP5GFoA6ThNDp5(PH;SgQ~ zGw4J8!?Wr5-*U1bGJs1t`J4CN1QF#t3%4CiN;26Ag~ zMLg8hzR7Ro9n?r(<-mE)AOal$3P9=D+uco<@#y^cF{i3(C*IIZ73$-6n;1q4_BoB6sePlOk)bn1V-S z#B~aGcnAXqA#vQttc3awwA`1vnVnBOi=?a1roNCz1Bp-?_K&J&_Uk z%9Z!j;*hsGsyj4lS8_*L7J+RjX!Lkl;!#L*>u!8{Kij3X_9|tR?v#l-sE5BfSRKNH0qPe;G zg1VPIFA%-*4}YKC3YwUh0B3@-l=XdX6ocMHFkGaFokchyakn%PW z(4G90-840ogVh&v8}xJOTvi4f>FF~Lnb4(blE?9Aj@A2J+_)GF{Wr)v6jW3>*yksP zO6s9xz)-{>w0O8B#PWQ@*6^*)qLe?E82rO{I=#F5fv0D#8#{Xj_^TFp;54o1KzRk$ zpu+kS9nP6oBeC&uXDqYSxie?Nyh$k)L@7*tj|j4lfr-f&{X#JTtQ|5{defYtL_<$0 z^l82)5r-1VXTFQNSz2EH`V+|9VlmKenhs6%f`Ps@`V`RqU@^%OnSqs9St*!F8+IT6 zN5wD0YW}O7W1a)gX6R3MtLMRn5D)1uyBsudTc{KPoW@cuVmX>t$277vh z&!6ub8#8Wpg04*rVhu3U@u+|K5}1wHdyh2*9lu5*L2WB-alYKOv7t@9D6kWo?5|y# zpRZOAWTqTAOg&iA0kJtl#6tTF0t~+s0Eu^yFm&_8!Ovsf;Gf}( z?EuDmB5-5FO2Q!OaRNl1Gm|HHDEwbXc5T z>Fh58QUY^+0w4ogDFl>+B&}Pn(c`|ZAV>Dd@7)D8EIN!b^$#4{X^maG`HJ{BJ5X1> zR?MB$V9_$KcC&_fhk((xdAs;;juVkT94EN~za7r+G$dkPGee}+2) zEdIltb<10~ZV8ybx!nqO|2)^0NE+-oPza?zpH1e!AXk#REL(>D&~s*b3c~{^U}w8M zDE9rK!>b#!kft{S4yM2)cMdK{8 zUfOR6yOdj#%`jScqEh4bbwzNfUhg18kF9<#tM8V?~)kVrbVuBQzx!3W*4A zRh6#-n*wynNlA>3TzpwT^HF~N7n}7@gIQ|$xVY2UCh#?>Vz$=-qCd7$5C-&_x6?}2 zs~CQQ>M2FJa;UjSpo=!i8^C2%H8s$-?S|2tY2(DcP##f)Ls!>{3gYv}pIwdTT0OAN zBnAk5(MZ=ot47B26l@$Ufsp_kK^^(5!LScwc2!>#)}OXF2kjbJgj#8{-@aHD=!%$a zQkCEN`F<^N>J0$XGxU4NdYb#vilWmbO$3OZrP=K?rLQfFR1Pu3D%VK1c;DG00iCIs z%!hzFs^Gh$J&&gpSSI)v(@C2NNGROZ*F*5H(1lmfn|WpC6qFyb_MwGe9Y;nI*Kp;F z0j}XNgFiwaI(s#)Ra15b!LHiN2}zPJl*r5nHif>vzK@R&jM71i&&|b^qmgte3_=02 zuraI?L_3H#RW_fKfyaq|@d8{6pw7hNoa@%=_qs<$5|#$?_utqUPXdVqu4~w6g7Ip9 zLJ@KBeJ1}*=mh^W9V4UnYzrB1pPZM&wTYp}G`n%*N&$SRehi@bt5@5A@)6tX<1WT3 zLG1miBjn-<$KU1qTp14#4sRyKrC;rvi36Ab4(i8L1273uGR1!q38x13+dh7Do~Vgu z`UY-Q=%*)Yy~~O{4fXY*y}BCXqo{~zjRf8}hEePf1!k_Gc50?M0%EZm=6+rXK!LIU z2Hd)yhGqS=r*0~`y3bQm#sS*{9q+%?f2N2f&Zl`_86)-~g6K4npG6qa2Nc_MS1|S0 z8V^CR)?B=p;`3YOZN^CEq^yvL!t^%Gf`D!RQ8|F2%bE|tW(TX{I2PZ@?&uF759l+% zP+$kP^8h%8pTU;h%J~?Fl&EJSz{22n}!RRxm}6&45ol> zB+HSH--VZ&_fKYs;;g*$^c^;}3JN1}T=d1VL-dG{!d+?y{897N^11}fzW+A3xHOjUJbs|Q>ce( z3L>AVq~wDK4-TOrLsGGLK>`Ds!MI*9KEoWiHLtzZN<0o6S3mt6L>i!?j-Qd35T1CX zF90Un)sd1Z-V9&2`ObKlmI4~5K88i)DGE&&Oom@t?su+;rWX(br%p+@&9wu3q^C~+ z)N(yhBq%9^0p^HKo^;2~sXFv|{`{&s6L@^2@Fp!A+vL9&Tm#KL%@J6G1h+;YR+kn) zWWKJdYRPjUzt&s;`uD8=-e%utWjmosCd&Q-IL?g({77NfU+{0kY0-WU=*6|E1}*!0 z(5MImSe;-XOoVW3ROg}q+L)RL3Q2s!ygFot#Ds)40hV*z=J7o`+Bh7ssHw`v-hsYK zeP`;J$nLfj5^@W6=Fu_Yq<{Frm%Otz=ouKUU9f<0Jtk)`QN5jnWDhBW|Etau&}?4F zvxOqoWmqhH2+X5I7#TQO0y#lE{CDSy*mJ)RVF?L_zP`1yBj@u*Z6IPmBQL4@%@ogj z=FHLoIiJ^~h8YAAiEOwlf`bYC9xzp_n^z9I1FryIh1Sg9zrM24vuJ$t=fobkO+l^K zsD?V0?9wb^<%{3;`#jxN7NM>j*MZFQWoZ0<;V<9Hr-pziI!sc-y>H<>JtG!M|nz8THoopIVcpTk7lBQJUc~C zfAQi)$G4g=VqtefKJE>K$%&{rXk}#rsC{4exCWxQNuPf4cTtr~y@_ZB^YQe|jHQD^ za$H>VzM}1e2k)w@@#LJuC^TCi`}*FX(Sf6fP(GjyX>Gm29vfo5NekKSnEKodVt@(} znuxmVyuk8uAw68APK9Cp!bz^`<}J0f#KfGYb7n(g;O)5k+Gv z*nUm9vXzQSiOmEHrwK8^z7|I1lAJWWG@&YS)ad7laX~aq1d1_FQ52DMO$4_I8ip<& z8*C{B48aFSIJt#FN{2K)G>#>v2qDJ{J=%f37Szl5HT5dte4{w$)W9SW!w9%|pBWpp zGb99wm=toPMHOX0(NhYit6!8X~PEC$r zLf{lr9q?)F658K~;CYbDH4+46xG%>giF7?!6jBO?!MNleK`utb;4?6?01^h!=;KqP zFR-<-p{}7({_b7ziX>EYz+~Mp6AhF>PIJxu>sRngO4<^f|$Pizt|1)>|hwClbu3q!n zwRRw5VX|WrOD%nAW;)=FBIRi!)}~6P`@}>f2o}6+?(gkx5?^`1(hJDp!-o&E9muz0 zq45f$TDZnxqBlixenryFkz9b z65tLV)8ZZ8dH9(yLY%__;_Y^XFIpkl?@o|(6Tz!M3`8^oVvZ=S;1fL~*=8zXf1sVE z09W7#Mfqvr%upiV4RmH>DM28olKeEke>jnEMPN`9{5%6cbEk)f_~a6VB6L2$Z{ zPqWs+JGMnIftExFqHy0v;TkgJH6m^NDKdchoo$AXWm=qIkP?;&feTb9KS3oTg4oIx zrDNb^L@@CEPctwxl7MBR9zXv2-cvXp#UyU`r%t--MSY~(;M&`$Max007?Y$(1xZpH zKnk!xP=Xic=izZC$4E$YsX)~Z2_N6t-30;$o+AO{2pafVj+Q8R0tWslS(eg`w&Y5V zkfYF&5Pbg|Y2%HZBMS?`3(l*6(EsIblif!%JJerfG?zIx7GZ=yF#RuSV3Y?2n(LvP zOj72`EO5UWaKT&pOS;TK2JvHR1QLH1KcV(GPuALlTOQDRVSjL1+60hT%Qch$KR?4T zmy?r|hqlv&u%MtN%CD#T5rddv4U16t+~xo3dtI*i46@FXc2~n2tCyGp$Lvp#KZcBr ze(>+6RuUkX^mCmcslj+hEf`1uVU_*Pz|XY+%Wz@K&&#lGguv@l+nGYP%Fe3fo$dY` z`@#f@LU5jck_Z@_tqSSC9IZG>z*o>>$b#b$Oepc&Yu~;#3uK1htStf$S{A|g8tLy8a{p+9vPF91H(TLz!|h1pWt^XGlnL^ z|2fU3mLR7h<8>~CA`x8v|1y8Z{EzuF2`4=R1Id8HeHeCtxoBt{&R>KFihuzvyL&m- z%3@JkjmRyM7=fmQRXPoNCUD{4J~$E8GBu@e?%efeCn|RNeV_uXMX^GFm?9{dbKZ}I zDD^orDUPEMH4xM=bQ^!RSwKr&U9TYqkfVo3#kqV8iM$t&0c8}S*GGatTPXc4u`8T* zy~3{v5YuwtNIl1S91#eG9$|;-M<5~r0d!}Q!Q9~F|AUFmkWa|JrERC;VP0{X9{&7y z266P78hq~hBg^>^*cE>Y;!uu^L8jWmVxwbYUHV@~)fd1#Wg~ZZ{QorSabQ3JQ#pnd zf`2zXxmalcQ!&1GjQ-1%MhQHj39%k$`ERv`0*^c&h?fT5lOYmoy(ftR+RHg}H zyyCUFsB`b$8!)lX{y99QBdvXIj!3_-c<1JM|KU#3Qw10O4IXDy5FU!b8~+I|zb!4P z7C;h!Ed_MMuxA|{d8f7XKgJ-U|II1g_tz`_W~5=jGX~!7)i#X<+;`*c<9b0@-$rwi z1pH(Y+Mpd-@oja`{O?Zb|N7>JFqzi*cmQ(?Ckti^pa|gEGD5fB4SHrD7C=#l5Q{5$ zurj;~PF|lv6t|$DBRFXan}8v;Yz|+_mXC%xE80`1;xjU|K7n+x3lfc{<`xd1#J+bx zKtRn|LBWE&P*jEJY+He%b?eT$fOathyBVhzWV{hDsS(Zh- zHrkif2ITn?Yg~FKxKD&kYxK>xz@vN%Mo&P0Sz@itIUzsT8OdG}oJ{p((wta8FlXB^ zsMGk@>KVkPAYb$tq(kFMXZ(|9YF-ZkZ=icR;b=X}KOVFz@S$y$#ra&ls`5t71)jtL zy1j9^U1poWd4JqIy^kc!ynh9K$k>`G9womMDAVGThuWI^bDmMq#V%fbM_Z8FzxKF; zjI>X{DNJD^{_`DMD-#f;5Rnd>T%gxC_}<{U1AI3e6r&E032}xA;I(K>*_}2jABE%# z_jBq6vF6a8T>DKST7Wl-0a4)r7GzIP&$>Zdcq)Lq>e{q}n!A=YD}J$Wo|lKu^Bdm8 z!uNvpb;lI>o}wI-go~yEj{HELhZF=1%x-aazuqPsyC&|qPXttdVfkt)2G7_hl z27;Bi%Va9u$5{4^r%%yq%ljjEdfaO!?0fn{XSf@nzQN;~x&Z^h%vfuLr_)D}P~72h zMErU=i|@Gt%HF-3tZ+0mGJ<)~TYynPj8kQO5B<^Nd$ZsmLnXQOanZ)oJbe!5k|z+d}3OrIwdLlLJQ&-g?oXcJK)viE{M`OGkki z$6x?ZZ+Bk0e63PSfZIRp7!!J|B-m$qyZ|9gfI-d?c>sM|K;dCvT;XsfEgl{SW;zwW zOTSFnI}c!T3sz+mOd}EO{+Ko6!Ntlc|Ey0SpTZUVd6GE%N!pR(6eWDT;am>DP6md2 zl_QPSXIkrhStpAgO^6X ze*Kyl^%c!#M!eNrX-Hs3sIAbB%!eVjG7!r6_`uJ6r5yjjBAB3r$&{;8TcWR|T41Oq zRqi3)@d6Xvn{=Zk<_h^m^($La3wk5_-nl%`|AHxK3BuYuM`IPQ?2_{_|l zSYjyJP?&)F_*@7R=<`rV^`czB{A-943J&G}2R}4&6q+~4i;Ih&JBOFlY8tj-Z^=kY zgSjOWT7|7iRS%CX(3pL;mQT^r-iNjZxIgguM;(W!4s3u42PA`jNnp*B!CL%1BS0Q- zfAa4#@WQ}gJ>Eum`niuG2|^i#Lr@S(q{4GT#(?HE!+wBgVR-It+5;8}|6v6|2=(A3 zm7O=pGe3X+EZ4eo2Uu-am=iG0!IsqTIzZxg&*Nc-$9Y(QkK4W4eW^cu5Q%7kHEJm- zEnNpL3v{=igLwn+jD`2mQGYwG{r<3tTv*%+1&tU?acn{MwyOpQPr%XPV2zaH@MDAT zpHoKg&Qutx7=`=)8?p;9DGQMDPpUbLje~5WAq9gaZlKgN>k1;M&%^jl1D+fGJ?02{ z1pZ=bz3=<_%IY~=TD}2V3!X3208S`)(28Xd+raqb8PjSEJN z`^Lxj!1C*U6|__G_y%}h9LSwf*lU+{b^8HPCoO!PoJ=>EH#BYVuY1!4+=3ZoY@d_m zJ)mCz-v*Nk{u`gA*!?zXCR~8=1y>D>O)c?&Nt{8{svSl(3=54u3@ITILI0iZAaNP0zeLkP>=ly!W(a)Q% zwnRa@*FTGxtm#7euWiw5YN0@oSmeZCJ<0OUz5X1T454EkZ&ga=OK{_7X3nB`%r|8X zo|b8~hl^J2mvh3i`s4pzfmYk|0s(D|Ho?AtwuuII<(?x@Kk3am(_Tw7zvt(BbABQ= zQkOjlexvy%iA0giKUnD;DKsm$VLQ$DS^&-O+GF%a$fHMV)#QqycTVuK0bU@;50CHB z6aijYZ{`Z)GBAI}CKHY*+yXLqylCMxod=#YGcAp((O}c9KJJQOJ_e`CAI!hbka%vH z%_Yz;4VDUkucZs;9zv4-CbG~Bf64cKkX*0W<`7GV8rKa6$#qsd)j^g0GWGUKE^Ws4o}8^hV>$uxny?NSG7t zvlxzyy6g&d7DC*_E0qtN2@0$?HXTDPR)wB{GxGuT;bx76FPTZ!AR8D(1vl*FT$wr@^pXCB8d?lC=VKD%i2F%2e zCR&9w&t15I1@J`P7Qn3&Z1yC`cjPU6ZJnJx*2VQ`V0elt_yP(9j7SVxP*BjG5I z^zn9j9r!D03$+v_#0oBiQP>VPf=+k95#M?Lm0qf54Sz>#unf~LKyUV{%WGtc_>rJT z3PBet4?ff3=Uh;j$omo-0&u&r}aDlYzUQof=~vmr*PXm zM79y(w}Sb!8ZrK7Xy+>Sq*=L`U&zfcT=f$n=LLwyz={Dn7ydG#))GO zSs7VwzhHQQ6T)p}xP2VF(Uf}L?e(<)=0Xr@r;ppK&H_2H+1q|;UDoI~9;PBn3%mgJ zzhx8iGRsFRk=#V*ke0HOD=#$gPr_V{Y_~7-@Ay)dLs5;0JLS_@`VQ1{i4*yLRy=5o zN=36xK!na!HLnjKGQiUv=|z&_QJywp_<>Lh>21akFr~xu4gw$#JxdcxNuH`kNqNy- zU?PQ7z``j00b>agU(3v$Ng#cqhnKM@j|d*fKNr%WI^_!;o|C@AxJJ0_UGlX06fB)*uPuu#8IXa0$i4T#I64>#oxZDf>0GvLv AY5)KL literal 0 HcmV?d00001 diff --git a/data/scan_perf_zoomed_out.png b/data/scan_perf_zoomed_out.png new file mode 100644 index 0000000000000000000000000000000000000000..5214b171af3169397faa37053ab608cc0618491c GIT binary patch literal 18702 zcmc({by!tvyDmQH1}OYq}oEDv(w5 zx3uJOZ(WyeWZzN>ekxzvjH9M-OGlcD_A(ZZ0(++L8qKvUsvP~~kyHxTRtTODB+^~V zA+oW3g@r{$)qFC#vdvt1&)sAR z*)Lo$tZ-;oeO=)^#zIfeZQA$o-e7e+*MrTU+EPPT-W+Yj~`$jZp16g@|AlaVqCKTpUqG^zD?F!wzrgq%5J)F0kO zGrAnZrMWs%GqrC8?o^^{(?lnL z4kZf#rlOJ&la-Y8YoR}T= z=8UbatwVcJ!->jm8Mo!1YYnHP)vimF>^DCcSCyF5xOJ8W1_u7Bbk5FhZf+)Lk=~z8 zaz0B(=f5}NzKssVA$SoHVVhA>(%0E}SwJAN;?d@`a$ZAlaBxkHRF~V*5A|WUoQ-or zR-^+@FA4ge_zY*gdi5$YQq#vrnmGrLtbWYn_@;w^f<;YW;^g>`uBO{WbTpN{{gSNjA>4P3$se+(M_Yc! zhqpey8`xVNQ5|_eO-;RP5E^pJJbn58d(_DEksKYXKZ>9tFHgni-eHie-_f0fcVubO zm=z~ymh8Fj=O>$;Ts>H7)n#tYD&r+T*TbdxwrA+*eVl2A&N>0BPgFvJvLmfbQcB9o z#Kb)(Cl)3qwd}SeF=y?N%c_~zg7C`%a&s5V{Ero^tqt8rs$J1x4!=JY`~KNQd*|yr z=AO?k6&11Vyj-58%PVH@;@1~ zT}g`S_Ug!(n%{jUieaz)CkskFdqqEeyd({8b zcewctpHY@Zii*FNSA>xD_s+JqG|{&g2x|oEgbnu#Do3L~@u=uOzWDj`XWK-*)GPP# zTRH94hVU-3+zbsaiF}Ri8nbn3%*5<}7Kc9DJ zD@Rp@YrEd}&(%6R@DIN*qf%E(7Jty2p%5AxI_W65^3tti^~qGfrri5? z*M?Nt4qb2yPKK;gL+5b*b9jDr3Nt5|ku!X|Nv)Hy-ixorvWaCwn6i_DqcYc(BIU0A z`KC+-q#QZJRg2}H#poC-Iw_C!yI)^2C%g$QzVS4{l=DKpK^C3IpIyhSnfpDS2?;JP zE@`Ih7*kkdAS=Gbf3nVf|hK62?=-l3SimFW2w~UR|WIVi9 zx)MdGpYwm$xvQe2bbM!%C--L^*$1X4WkEiGSoP zljQ57GwCtor^jQbDH4yKd8tlJOgtve+LB69_W!d|DR+7lBO{?AZ>=V2W^HX97aMzY zyf=n!B$VH)XpOs9xN)_!voqe(f{>b0Af5bzYj6D4)q$|xfn1$z2(NH~8?@iMx(ae~ z()6D4+)F%b%OdNeG!Y~jm;BI9l$e+}$Me4PL)G6e_ro)?dYhY{j0gywKeZES4ZVr}NN@V>@fCjX<&SOJOo~!9E(my=ifoG-e~ajQ28#7mX$;kYY3T$w`#H8NsM<>|s@X z@AEUr0CcN$LuW|tZBx_HTF)(UUhPstJ-sh)vy95^IPME{x+MzRHuG4ka7%vVpsM_Q z;KadTR-W1U*PTAqLJ!n>KH4yty)D!wXvs;e{AYLN9r@!Re6pOTEdCQ{`5aCm47zv3 ztOmLWs3_R8*Mq<2nYvCz?fUaSm`F=Y>*!GNa&-vMnzg~&(;ItNiBh9=fTcv(uvc1sdyiJtOod$+@kF$@piEJ!#h#78Xh>Kaa9SJ@<6&R5oMM zxiIR}G5XR$g@t3tANznp!6Z4K4trsv9nKz!SBEI*a*8jRa{oBk+Vb$B9I0G~j4B*Fw`SEt&w1{zpBH)GX>zY3>ihCy z-u7vkZT~aL6uIN=nEf+Yct3}SwFlRK6c{CMCMPpp(jVaZ5i5-epAM!lDC*+k;vY-V ze4-*Emd?3qsj?}D8)zwkOP4Y(o?q$5>TC^yh8}--I5`6+m*%tnS1k#mi{84^Ztz#u zBo~wDhPaPzp8dPRIE4FHjeGadP=bW* z%X4B*gM}vOl+B$T%d;M*$9rNX=$!t3+3$*R^sU`yG9Ku*Bm8&PXpP!NG z>}&a)9Lz1xiIXuyZd$b*Uz6lrRch>)tc~dl1HVfQ*g&K==v(1%{0W>c6WD= z|LkQme);m{Y8-0%{9JK#wiD^WX?JVEo#H2pXJ~0>=I7fyRWO%H{1-e^Qd(bMPxJc; zy(Z^gnvPJ)0w7z>k#cB`{iU7ey}i9qHQ()SLODl|)cPD;Ex%n6N;( zSP=c1SD%xMOEXcJM_T%KdpnPqnET3bIok4c{WHbw+XI~HiQJyC7Dq(5#LUEtCw=U$ z^W?Dckj3pK4V{(kyRXvw&&EaHydU8B7Gega#5xc+ql!&^dq{N0{BR#yN z-T#B>9kDmbr*j`t{SIxnX1gL7MPMo4F*Ux?xoQKDBkyDbp$YIo7&rS=1xy>aGoyW`Fqb!s^D28@VEu&!&3Anwpx> zpHbD=bbvdQqa(T&-a%ksX znf>5PuQz>fO?aDm$|@=f3kqgv`tskoRhs@vuL#tMPD$zFp&Wa)si^!V6}6sm;}+eP zOut3`^Ria%aB1JeSVQw!@Ar$*GGWc5qiI9k-xILJ7_g5$@~7^l8QVlXwy*~{~= z++!kM8c9p|X-;hfNMklYOz7MOhKBqm)!Ptz<290}>6jyk{dI(&#rIE7{y5j$pHC1S zBtwn-Umqe>xwfuB8JTPi@9*nl6t)q3Jl_M8h@p|uqc;!S+@N_{S_5)*o#~+M(xXNY zl>UOLY)(M|^A&S>7{9vT$`~0KFq}Cf=zF*g;q7vkmyZvkw63T~K*-+CE^i|dM&tQQ zBi{(1lb5EXm>3%~$@DqM%ePFo$K>bbLHBN<}cxLmL1=;)}R5L{JN^~Ih5Ms1460<0gOUZ3bn61#71KHHf9|6VxJg;SAP zg*AlG6JA+dWIT6nX?(mwDLwAGx`? zuV250TShNphu&Ell(>Q!=`Z#T_}sO%^@Qw#fkj?X5t{Q7v@Ei7vi^LKHC^AoTUooW z{$5yKPEJTzn3!lf%gMoEThffA-LBu4iAsnc=6pMrIM3wl>|CFZB&OvbG`6+1DYw9f z!axQe%C7h_w&EiIV41UL&xX;d9MNU>1o-q20T}FnD8w`hz z!(|NOE^O+5_V=Y>Wkp0@n_u(r@BoN+=gu8vWg%_*_ZF%F-r_(XbH*;+>yArK^QYV@&thUSnaLzVLRh%0E6Dc zkxdw1i~CnsR~Z->7(dV;^P~;k!$Q=5aQtQ~IT1q$J-9DO4lQI9g1DS#g?|ycfB%L4 z?Cxh0gbfG&PS5T0i4uhmiGm>G*JUv;=Z9Ye+30-`HZIH~dCa48|K$+Gq_Rj9E9MAH z@W|u*e>nmnyiZF267=pJ!F>e90*_!FgMA>0#M~y0n_9+Z+<`#O}`Q1^!roq@NnXq3Kp`CB}SKa zvMt1l{_*1_{gsSIbKkAI-xhrMKuSm$53v|Z&9z@h2g~Tx)9tgq)h+jHXsETdwU@o? zro6n`Tj{F2JOENJ*7C3dvhJ;81z^LWAYUgSm6kuedR|<7CGs{lGKzWX2%h@)1Zj46 zMG#@~APUs^!(@uEu&@swKG>K8ivVb0S~@8<_TmFR+ZiNniF9scg-_H`yoga(HFYZx z`oYStN>*u)GtA7#hf77-+1X+a`$s!o-rj>I!J|r4->^`Mp%k)$Y3`MGLX^sj7;Qhh z%s-M9yn6K^)q@8QY$0gv>=pq_9u(3b>z@PgS&^J4ezQTbv9SkRb7nPe1-AWJ*tfLq zvSK0IO!!<+F3ns2&k;PfgN^iDCe^@ISg@A;SXQP6!An8m{^$|)^U3Cosjo(5s4RR0 zi3<0d#p#3bdwSZU-X06_X%=J0;#PCU1RY5bHP#}Ar$a^Fb-9p32%!=1ws~A$3mYEI zb3u@3;bR_2U>+fuXX_isakQ8*f#dtMi6D&$696|LYSJ!Z9$8}^(PN(dJ1~1lAT};x z;^z=Qh(b`DGByMrdyxcDAT-b55rP>1ONjkxA>cA!$IAKZ!)f8dw1_YV)r4cDk^aj; zAM9IQ)WX3$#Y*oJ&=i_>2@aALf{9q=zYq65=3u;X%#VbKNLg8#k~kQ04;oHIdHKxc zDJW4%N#DNjE)Fbpyyi_T;Dk6U>}&TKpPIVw;NWw(ZOhL7&Y;vvJ>e<`A74bNnyu}^ z;m(q;uWy2Zj*d=*J~blTfg)fXPkM!tf$}TucNE`z&B4JTl10Ww6mB)lv9z?bFkjP} zL11+@H9_!`OW&qOJc61CkdFR3pSzZpuM-m5zbD4W(=#v(pwZ6-bc~EtLz{m8Hl8Yn z)$1XE$}e8wG_p7|(;A9xX=%Cr>HgQ33_?~tW%>E`4h|oJf^allF+Ji+bqYmBM#lGW zmyT3f-^IzvJ}5$HG`gds1GuOcj3SG0@|>KUwP$kFrZkA6Z#tg@h0o#bXkow66mT#iiy*?JT}#?p(Z(>ApuyIvX5> zKW`^XNK7p0u|A$4WL@pHa@ZW4fZV!shud_1WyQo@S+YRsG6BN3*J>Tu`Sf00u$XKr zx|2jKc^OYKfr5&PYIkicH6dZB>XCIQ2@MSmlf=V|f`UMz`kRhJ-^D#uC4%T!zYgor z@$Fk2pV3)HMmsAjx6>2fte)ZF>Raz_%F4S=3Vzt%J` zQ0_cdfAoV1)2HW6WyRfBjc?t$^_0*kJ~>(1XWzx`$KS=9hTUs-+1ANvb!MjAX+%=? z5($zB0fk)3_CC1#^W(kp@^Y$F$;b0&q*hi}-@ba4o~wctpeOj-`DU{i*ONTxFPIp1 z1wsgZFg3Wx5nmF*unX`CN|XM_IL?s+dkzC6&VMTG^MaUs!eFHyf@f0jdKnD9dCUMA z@5T1d@;`qG^D0)%t302Hz^mB*dKGa!1CcQHzjN!4sqne5|7s!gqMsR)pF7{|i*0Vc ze(qcnP+y;_t2a&P{4j@}=Ooq8619%iQSWC zz)#+Lt91}hR#H&a0T;*uEO@6F`Q*tHjknT6Ik(uw#3});^o!bJv9hKD9hsSx^)L%^ zhmn~%B_@WQh8QRY7FycY<&?98WMndMw+svt-o2a3Pzbl`e#=*&chxkw19Oh#LLZt4 zsyl6dULspoh^IQgk_!ZX4|{pf#>NIPl}0cTA02D~yQUZn1nA;egIxI-Bqj&}BqSui z%I)cDCSe4TJ(!O1uN0tRk$5;=w>SFER!**AcjcFy$9m(>pW=doQ5h&by*FibbaZro z4iB~8t*)(^85z|$kI4>G7O1Mr;=MdN^66lJc@FRpaGFr$M{;hxyZA%pOTLx_+&qLi z(D|k|cUORSnBY#u)*HXOQOx&t-a&oGxhnt%-9!z}^mh#?R zt{nCH*3$B{;v;Z+Wo2cUJg=&(bO%gW8ur3a*@yO$oF1l3-sOhmPaw)l@jKk+DgV&k z-i~e@gXwap)LQqtuh+(HNg&U&HB&P)FLn@&`R=9U=jRs`tW-Uko9CH*hRrJ{XEy9{ zlZ%UMxD}{?H~c1;VgR-7aDQJ*VkMTP#%;yWotB|IdjArQ^o2gmEqe)JrA7b9*VNe5 zltpBKl9CdGtU3w}N;zd^-Y}T zu?WT(J$?O6~tQNRZC(8 zQFT4Y`ZCt?A#_A$A&|vOS?WofTU)BCsxT0=#Ca~fkAv!?Tp+R|T2N4so2eMd;^gFn zCK&;SVChFeCE#l7ZWg;%pnADCwn(6pIX4{d0y%-v3<{{q6(N{0L*~n+mQd=?&CQ18 zc0l)Wg|VHSoB-MYs1kRFx64@KZq60fo~5CBUn%C;+FI$^RYK-Tu~Cnpmz3UX^6K1K zx5S@EL|8iEfvN)x^_vrVXkE9L{IC!-UJ&&_5aT$oTL34L;^Tq!V-j}>xE=ttR;W3k z)%f~dxfvyHhMutR-@l`yAZWiCRXV+QVRIBHJBI4hfrVz%-a=e?H|$4pIqgF%Jan@dkmZ`qeQaNeHZ#w=Ny5JB+YS5&-7+1=i@u$H5|ms;(A z>NhgOF5gml)8sUWfa1qUcuTiH)WHSUI59CX%`+3R-*k94^wS^OwG%*Hc=wf8zYr!@ z<=W)QW?(VlZUL*H#(W6TXv*we6mFgp7e{wYCR{27g&6=!4{7;y5L_6)S-tUaDLFYg zO)lg&x_GJjA1yg1I{G5NKmij{)`1q?z~6D&Ih!P!Lr3m&OI7v^1pyGj=1$!gIXU~L zAcf*1gXM4D%)39x1ZxCUP(xWcS8B59*RPt@&F#XNJLHJrWOz4r0?}u${F6QFbDlmc z&pDsns7p1w0|&}Z^-D}jI!8@i3V{PLc*}Tye=2gIT1QVys|S>}?qmt}jeIWXGterY z+{_VUbr`AMfoTT2cVA5<1qv9@MPXK(SSt^JiAYFD0DAsM<0^u}!2qlK=p+giRn?lu z3lCx39UdNr+o7ZE=~-pC*Opdy*{I^OurL}hIrEmSqazeRcCuw=z#C&PaMPR?y9l7- z`|qz$0Mq}BoLQw5!|ld!s-vqFzd$(w1TN01sH~x(*B^NoGbS_a%mx7hgRWChU400% z+csbdnS0H+<*To*IQ$gc@x4fcpx&)_DK!$nOKExawBqVOm&9ZPAc{GBT-+SjNF=QY zbLKJVlRqR7HYdQBCtO3=Z@$2gpP-kb>vWKw`xwNE1!o9ih(W!Nr>kqZ$N9n|93)^D zs?hr09&nVHItaJNW)ey%h>_OclOMRczW$_g?_PRDM8pfc?EtT>ro7wvp#ScGz5tyx z$5jz>KP)sNVg;Ss5AhEq-B&eX$6;X@HxGj4LND|2SqiXymRhTwnw~DZ+l0-L_kjq` zVs>_xno})mBL-^6rr_@;0{Y|}KNx)=zxa$QEVytpQe~ryiCw)g?*@j8hn2gckTbV( z#D^oz{73)w^S?G}1qF=z29xAt8@NL-)ap{5J0}H= z+7K%C7KpY`$v}9GkBp>nl!FV-Qed8HvXn1&ONlCOGfkfncSknTdN?7{yCqYBlFXUiBW%myb0NI^DGy`u6A}*Rm zM@l`g@!Pk2`@uqBs_${-99Rz~O1V3M@+orquT9wm(6NPc1xT9PH02vKQh<^xTqc@f z?gF-00J_K%1q+MJ7k@}#wd?Ea(K&zq{Lu-o0vri_wRbnSJ?>5dfKH$^^P#WCUVXtE|t@j8-}Q zt9nJI>go<|ZWYT{aLbNB`sKNy#bwHNXnA?@6N&s3^~}&^1p>PYTl40pL?E}_zJ1%u z%4+gg_IKZr1t=?vi*_U=;XvE;FE0<3j(IKd+1-49`&ll&Q11tW(ku7Bz+Xc~mPwY_ zBD}V)oz%Rx6yWTg6#qy{vb-Al# zuRr0<36SZVWnI9C#0o*rssM= z`QCRdEG$&Kk2WS-7Z(@T*Ij0tG5Ag;FMZT!{g-nCPqHyMStcg2xFKIK{rLR(^T2x9 zL#YJC3N)x2>bc9BOiP)(Z^Ff$8iAuy%BOJT*CK>VNDSEcPo?IR*+im*;;3_m}I^tj`wjeGhi8^+4cMI_G73AjLDmlKb zidoXo7cVkHMIDE0T^JC=)@1{Ah2DpaQe*$F1AN{RG)clpcKYl zWYPvvl;P|&fQoacGI^msT=R~RQE6p!S63Ice2YG%8EV+7)j7-C%d4bxEmW|dO0EGl z8;%~yYUzm5l>cckD;8;izQzLW7%VHM6%GbKtk%`m-h9b41keWJJ~}!Y1OgrjiG&jb zVA~Oh5CfPKIQPi|X=DfZaR2@km@n{faa+EAb^iG=JuNNZpz%G7%dNp+5-EnEWbvl} zGS6|N)*}t~w=wcsYieJfiz}P#g0KldxsxH7#D-v<=|1I)i|KfVW`?3?0EwpJFUU zn}KDP(CEQ}u(q`Hturw(@e&-YhyHV&2V!@_L=TnU|OpB&?o+U8dSSy&uvUQXhu) z3cCTh$` z_B;f7PG(My4WJ8X%*Capze+4g)OA1+Cd24dFfUy?TFftkVL@9*M@B|wAs1XY-@kqh zIS6GRR(kySv4#fuPT=1&r$YWCxAPm7N$(3(XUb5(k-$C7^s@$KYo-5XU zbQ>K(&i_wxUfo|v(uK-jjS4BlY z+A*2~ZQb1%6;BrvG4|RIa z*ZUn;?~h`@Zy_&$U*Y?9Ngg-+&UuBa^ZGE69v~f&hV%MnzOOmKpK~~%Z+4}D-NwN| z8s^7<03^_cDXE1vJS;3PKOcs?_*C*V(-R}-y4#hmG5f!|`hNJry@s|4LQOS*hG!;stX)a6ciK{Q7`>QXcyEuikT3!%F$$CdIJG)SFxvBE>8S8 zYxIjcn`srH2{vE_M4^i<7zcqxX*2C@Z@1l99DsXl1ir4|(7(svwkjRlrBO_G{c9F} z4GgSd@L7-DWro{*9UXbAs(1VF|4`{(uRFL4xB|wW><<@B+kV&fUYsQa8N|+`0gyy? zgcu!eN#W5}>(8He%&W>+X+GrU-ph`=Y;YAqqCZkBSO$`7t{0%h#`^-6gCPx){(5MH13useI71e|?bi?0;q8%q2*WvatPD zoExkO&r==IoXf6wR=a@SI_3Y{z6xO{@H?x_HR0PY{UX!y;$lj(6DTz3ujCXI*8HRv zmX^8O932^>K6jm*WC6=(z0ynnv$xhi1!eg>4RL}kurkARQmEKnB}E}Fu8<6a{}5vs zg4BV?TNAPEdpf*7IcW|PwC9^_&MK%bW55eU5?Q z{rmT~b#*Zgu~L%IRI@0ID7pf}a&8y+7;l5d&u{z*Wy{XbUx+cZ^;ue3Y4cV91rH-> zUtb@vU*)DSJs)q8fNTtQKD1wHaL3+Mt6XTKS`qYppo{L4@FYS!0f%4-98Tu%@xO^g z4tHW?F(90v<1^Q}8DU@_7mHG*++P}+N$7*H%v)~$Td!DVBwa-#m!&;KqG~*#MX+?X4+b zH|NGDvG7>~^lmxJ+6?3ke0nf-h8#g%@ZpNygwBbUuRCDluE$K##NaE5_=wgG2;|s4yPEH250Ens=Pe?^qL4hNICtWuXc)A>( z{_>Lf+YwpXK;;T0U!^6YNvIS)+8}DMSqEJg; zR!LJ6aQlweSO*hchG2E@bLxOTuy;usz3Yn{{1dtUpNzh%HXm|*ZK*AEnrFLQ>woN zUYkMfxp;9JfF2P3Fixct-!U)%d7)<+>QN3OOlmN`78e(Hx_M+4`t$(8Jmh*% zD#5+EE_r1iCB$l&`^R4dNLxq(xZdGkzjVI3cEs{PvACaj3Q!SlMjYeR`1m-C8UQ{8 zc1BG8(RmgzornGZ?>t&D0uLJA-HZSPuLqO@59Fe5PxHNd_h@wD1*kHUDOp*^V8jFL z3{I9)AQUc1a1wvsmq&We__3H>!?HJ|Do!C;6vqy{o3K|<3lmj2sgZgqQ*c@PA-gG_{!fcA}vRqomsx?nXbjXZ+kce@KwDTc14Elh2PBjL)onH$MOAuc=6V#Cqu55x5<*6{VzV zz&%GK&+MLc1h=%L((LwaV1w=(K1gnZ09sjDfgyfzV&eRR2H?Ox6c+xojPvXrqA{8w znw*Z}?N&>pV^?u`d5ZXh3$9Dxw8JATMPlCD06*WxO5(3jua0{SS90=H|Y>&`ib{fD?p@OlofO9l{94!=yC< zfHf#62&mVQ-@h-gvjf>H@a{Ov8cHz_!Su{b?;n%}^5sYNH`d3T$+x$+(a!{^Y+N5K8=Ju@Azx7++8X%QQGewG|2sOOvuj+k>HUaZ0t zt-s7VXzq~*iOBlMGWGwGlEKH5W%!tcnAZ_Hvp44n}CjafLW`3Z#82rv?t zJ~*LxeL_PmJ4BcWtALE95Tz;8yRp5_6JQySiAZArCjzjSXUIS49!q^k5;0So$~EyQ z1zI`^=u;kn@O#(|@hDWvU%w0E!}9CmOazc%&Iuz?6E|lwa5T2z^w42MX2PdIbXvWw zI56kcgKmbZo5&}>sI&FgAJoAX5pdWP-$M5pUhWGo*PR+7yx=s5*(p$FaNx%9iT)Sbxe(vr5omz7j^nM{-pm!B zKsjP!3=sy4DPqy|5hquemK^!(DzwitDZQU{2}mMolmgi)igW^XsmoNNu?ue;4+Kc1 z02ek|C*FX2U?zkOAL9G^BaxTbm;z2PW)6Oz#9P>8&oGze2p&7+G|+_&%U_=yK5736 z{N9bZ%tl8%K_$4V^~maWgpT4QeFO7hX&e|;asowMU&I8hb6p9HRIm%i#bq6uQt5J~;t$1S0$ydU~*WuRz*} z+VtE7w|XqC+v@LjgRHW$$FM&qmL>il8ayihJVV`@Zoa0mN>W| zt=t*|utY>e==hE4MIGpC{%N+70I9vl^pnFnw&B*!b6|{*`s#ev!IFQ+(6D5W#0IvJ zOwOgAK5_#(?A=$ugPrkLg0f&PzJLEdGZT|X4u8N8)K!lwC(T%eOaT?(4O-t`&=C?6 z+Mr4V*UR)}Addc)?RCBEnx5c6&#Z+M+wjrLdLEc^XP12__d?aQ5~*uOiDREI<9y$ zr(tb92OJw_&gsGE3SSn;c#4@&iuZB?jW3dk_xub^TYZh=SYH}|HW!CO{rvR{gijs$ zyM-5yb_UHjI5>;<&+-VQBwzBpTMw$oeBLP41~NozJ8s9ItX?#8j|MEpC3JZ2rI9mqQcD$3OC@6xV37W z9Ome2GC+7LfrK9bdsQ%-7K-Y26JNdR1$)q7fzelk#(#Bgam^432ozO+1J!|4B!{(t zh(HNHE#>i5R$-^#;@6()_|5m6cN-oVkzr@QQM!XYPR4%@O`$n~#F3#w<}Dz1&PHFW zvsDyFT}AkyxTM9nh^3D%e!<3X#tOIy*-|ZUc=;|HlTwy?I|{B)5|n$C7#joklR*o% zMoXL?OL&bTSuu*T&pZ$r^TlNofaG<&D0P-FiZrwLr3D*G92vnJSm6u_O{PdOD!>9c z#2kbd4x*G49f(XI+Zf|cDmwM)QGZ;uPhPcZnk1uV1wO&AgxlD9*>?7H*5}hqcs@{zVB+kNOW}Jt!_UZZcKb1W9_pxbxe(Qd;PO3GzpC)kYiNr1{ao3&C z)yh79Cg3a>*$~&4hE{x!is>DUQ0*k5zJ@6lx-MYL2kU;B5Ob#<>-B;Sw%Hs+Hxont*!f0@tdl4eZN{E;_V1hhvwF^6gp2nHPaW zfPWhlbSi=LmYz&y>Wye{5ri(O@D>%ZH_7OhFuN*U`q}JGi`t{jnT26$7!DW+%+!Mi zR}^q(FEzuCv)jO#r{5shu&k}CTPjqVK$%Odl~tHyqt(wF8ePF!%sNw05Ra-^!XH+H zlL^4WVH9Hl^pG7UPyi|ef{=lVDmp$McC2Ff6Zjpb zfejOw7m<;))c>JgxJSU#kDras27&0p^&BiSn3ul;umL|{V_Ro6l=e5K0{ zMW%H?IX2(-y#t)Z=H})k4LI^4;*#SIU48?fAS}Gy984lC;p$bmJ5%agZuwB<0=z7qJOmM4?>!sr+c6>k>D8eynw zf3rL8JnoI|f44{DDmd?kuzKq2>CJ+J`Cvxb5%pEERu3%Jq!bj+{aIv@x6O}1=iauK z0h$Fmzs(z4RoVSB^@3XUESSl%S7@^|JjYrv!()5v2t~nEj()LHYKW(YD zoshzQ{3Lx2$4fO;+A}&ugzAc@=xnjd<;5>BuEr-NZJE5H18{((_`-Hr=6c>u0hrCs z9P=b#w$8zpk5mZ>3D|Nnm#+f6d#0kK{WtE3;1)ZZ_N43PBV`ln8_f;Ok^kumFME-U z)Ord5I|4*LOcXx)bF661jDN!oxpOV4$Iy;^s#UDZvy{X{m_w`X zHa!6?6a48Ws^8HsU?;)E_Y^4rU9&Y8xKUm>~WJH}ulLr|WSZaA_f*=OSPR7=U|1yU^t>%OaeOK82w_%nJ&)*Ap&gq&9 zryvY&_A0V&n2;C%$9*j_MbdS$=7J=?GjZ!UGnUuqo-M4p1ds^);r$?B5}$}Y4PkEk zTm&XG@R{S{<6k`%oi*u7K2wgnBY@dm2Tb<5$*WWF^S>|DhPl_U%;s7tW{m$)a8Bsq zx#>W$;AXgLU-S8cG*E;gr(m9Z5f+v)1R6Z-cD5RC4D=aXVttGcQQ6(zPDDb&%fplY zC6S%F%108T&n+f=hr}UXh!-UZG>oX zTVyRX6oSubk^@R_Vgt8HrMWH{-+!dSig9DJT(qqHCYpw3%$DS`=W`PWO`SK;P zGiD<`z(?4n75UaSyMZt(xY@%s?#`Dm)gHXIFPS7T z6&fb*AmCaG3d|bb>LrNSzk$=ZT8<4a5+CHy8PEkdXF%Ix7P9Ju6NTxjF*Ye7!52y( zxT8)~1?nD8wh(<^{#zjf4jQ)In)&VweEReWW-kzufpx;jfgdb;2U@|ajz2p~kaJnp z`ix)<0ChNi_IF=jwAD~H>}%{Xwg8a~vOH7&+Aoao2fISRK)2roblcI!*D^C+-h6;d z2mAXym!NDgf`|uU0D?t9VGubY38;CSsk{YrcQsBh|72?jm9L`HvkfpzyAEu1)UmpQ zSc|f;-96ghINkF<)xUikWQk00$YBhNfo-t)+s4jLj7~FS7ZVNO@Aon+^@jk_oJ=N2EGLW{Zz={n(@PjB`s3y zvC=u_u}V3c@O}~sitOvd*mb6PTV}Jz2#6Xo1$i077Yu5aZcLpJr zWgg9vitK_H>CBi3Y;ghW=N@<;hB8#+_|XsOo*O>cumsavt>3X%-c3-wK#K#j956Eq zzP^F;!#Iu;f4f*q_AlY#;h`7iYRkZY{}Qw^ApT*?HAYSCzB)6d5O3=h){Ld2B#;hN zpPNc)Y#`tp8o3Wh+8RW7Hw4TQOnmBz!uM2jHo%-V@jHMn^?18qlbVW(+lVmux>sf_ zDi|FG&d6UNqsLO%NI`(!<-homn_A21CnVTXQG*j`i+~QSS% zwx3@JkBA_vL4j!#c0_Z(rMLv~k(UR3rlk_(%d3_hu@Ei2Sr)ZEuqD~*NjqzqyVVc4 zSinbRGk^!b-C;&Z&u(JPmx5x1`F;mv4t##Ua3y4Uuc}phQYwY?0=S_6oXGL z$HdwS{48o}YA5?s6dLL+x2T=0tibi&{6-6RV{h*iJjC3==)HJB8~ZZ#r$7S%p|w2( z>trY}edeA5fF8>Sf|5B|3V0{{cTAfH^KsPWVC(LCxMpj4D;{joD9N zx&^$VH=sA*Az-BnMqv(5*J_jXhCRU@XF*;D`NCQSpOhYjb>ZsOXyzNJYtx|m3G>w( z5Dv|df4u+qtg#x-vKgWZFJKYkF)f2JU^rd3q@3W8<3-+3B5 z$blQK;MQB;+%&J)y*?RU_36{c(y|oLTS`*B%rs7b*YmJ2!v?=&O!Nf>iPSDZr;0j==a|Ex+)*&f z$<1ww Date: Mon, 14 Sep 2015 05:57:52 -0400 Subject: [PATCH 12/14] README. --- README.md | 317 +++++++++++++++++++----------------------------------- 1 file changed, 111 insertions(+), 206 deletions(-) diff --git a/README.md b/README.md index a82ea0f..2d14392 100644 --- a/README.md +++ b/README.md @@ -1,213 +1,118 @@ -CUDA Stream Compaction -====================== +# CUDA Stream Compaction **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 2** -* (TODO) YOUR NAME HERE -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +Terry Sun; Arch Linux, Intel i5-4670, GTX 750 -### (TODO: Your README) +## Library -Include analysis, etc. (Remember, this is public, so don't put -anything here that you don't want to share with the world.) +This project contains a `stream_compaction` library and some associated tests. -Instructions (delete me) -======================== +`CPU`: A CPU implementation of `scan` and `scatter`, for reference and +performance comparisons. Runs in O(n) / O(n) adds. -This is due Sunday, September 13 at midnight. - -**Summary:** In this project, you'll implement GPU stream compaction in CUDA, -from scratch. This algorithm is widely used, and will be important for -accelerating your path tracer project. - -Your stream compaction implementations in this project will simply remove `0`s -from an array of `int`s. In the path tracer, you will remove terminated paths -from an array of rays. - -In addition to being useful for your path tracer, this project is meant to -reorient your algorithmic thinking to the way of the GPU. On GPUs, many -algorithms can benefit from massive parallelism and, in particular, data -parallelism: executing the same code many times simultaneously with different -data. - -You'll implement a few different versions of the *Scan* (*Prefix Sum*) -algorithm. First, you'll implement a CPU version of the algorithm to reinforce -your understanding. Then, you'll write a few GPU implementations: "naive" and -"work-efficient." Finally, you'll use some of these to implement GPU stream -compaction. - -**Algorithm overview & details:** There are two primary references for details -on the implementation of scan and stream compaction. - -* The [slides on Parallel Algorithms](https://github.com/CIS565-Fall-2015/cis565-fall-2015.github.io/raw/master/lectures/2-Parallel-Algorithms.pptx) - for Scan, Stream Compaction, and Work-Efficient Parallel Scan. -* GPU Gems 3, Chapter 39 - [Parallel Prefix Sum (Scan) with CUDA](http://http.developer.nvidia.com/GPUGems3/gpugems3_ch39.html). - -Your GPU stream compaction implementation will live inside of the -`stream_compaction` subproject. This way, you will be able to easily copy it -over for use in your GPU path tracer. - - -## Part 0: The Usual - -This project (and all other CUDA projects in this course) requires an NVIDIA -graphics card with CUDA capability. Any card with Compute Capability 2.0 -(`sm_20`) or greater will work. Check your GPU on this -[compatibility table](https://developer.nvidia.com/cuda-gpus). -If you do not have a personal machine with these specs, you may use those -computers in the Moore 100B/C which have supported GPUs. - -**HOWEVER**: If you need to use the lab computer for your development, you will -not presently be able to do GPU performance profiling. This will be very -important for debugging performance bottlenecks in your program. - -### Useful existing code - -* `stream_compaction/common.h` - * `checkCUDAError` macro: checks for CUDA errors and exits if there were any. - * `ilog2ceil(x)`: computes the ceiling of log2(x), as an integer. -* `main.cpp` - * Some testing code for your implementations. - - -## Part 1: CPU Scan & Stream Compaction - -This stream compaction method will remove `0`s from an array of `int`s. - -In `stream_compaction/cpu.cu`, implement: - -* `StreamCompaction::CPU::scan`: compute an exclusive prefix sum. -* `StreamCompaction::CPU::compactWithoutScan`: stream compaction without using - the `scan` function. -* `StreamCompaction::CPU::compactWithScan`: stream compaction using the `scan` - function. Map the input array to an array of 0s and 1s, scan it, and use - scatter to produce the output. You will need a **CPU** scatter implementation - for this (see slides or GPU Gems chapter for an explanation). - -These implementations should only be a few lines long. - - -## Part 2: Naive GPU Scan Algorithm - -In `stream_compaction/naive.cu`, implement `StreamCompaction::Naive::scan` - -This uses the "Naive" algorithm from GPU Gems 3, Section 39.2.1. We haven't yet -taught shared memory, and you **shouldn't use it yet**. Example 39-1 uses -shared memory, but is limited to operating on very small arrays! Instead, write -this using global memory only. As a result of this, you will have to do -`ilog2ceil(n)` separate kernel invocations. - -Beware of errors in Example 39-1 in the book; both the pseudocode and the CUDA -code in the online version of Chapter 39 are known to have a few small errors -(in superscripting, missing braces, bad indentation, etc.) - -Since the parallel scan algorithm operates on a binary tree structure, it works -best with arrays with power-of-two length. Make sure your implementation works -on non-power-of-two sized arrays (see `ilog2ceil`). This requires extra memory -- your intermediate array sizes will need to be rounded to the next power of -two. - - -## Part 3: Work-Efficient GPU Scan & Stream Compaction - -### 3.1. Scan - -In `stream_compaction/efficient.cu`, implement -`StreamCompaction::Efficient::scan` - -All of the text in Part 2 applies. - -* This uses the "Work-Efficient" algorithm from GPU Gems 3, Section 39.2.2. -* Beware of errors in Example 39-2. -* Test non-power-of-two sized arrays. - -### 3.2. Stream Compaction - -This stream compaction method will remove `0`s from an array of `int`s. - -In `stream_compaction/efficient.cu`, implement -`StreamCompaction::Efficient::compact` - -For compaction, you will also need to implement the scatter algorithm presented -in the slides and the GPU Gems chapter. - -In `stream_compaction/common.cu`, implement these for use in `compact`: - -* `StreamCompaction::Common::kernMapToBoolean` -* `StreamCompaction::Common::kernScatter` - - -## Part 4: Using Thrust's Implementation - -In `stream_compaction/thrust.cu`, implement: - -* `StreamCompaction::Thrust::scan` - -This should be a very short function which wraps a call to the Thrust library -function `thrust::exclusive_scan(first, last, result)`. - -To measure timing, be sure to exclude memory operations by passing -`exclusive_scan` a `thrust::device_vector` (which is already allocated on the -GPU). You can create a `thrust::device_vector` by creating a -`thrust::host_vector` from the given pointer, then casting it. - - -## Part 5: Radix Sort (Extra Credit) (+10) - -Add an additional module to the `stream_compaction` subproject. Implement radix -sort using one of your scan implementations. Add tests to check its correctness. - - -## Write-up - -1. Update all of the TODOs at the top of this README. -2. Add a description of this project including a list of its features. -3. Add your performance analysis (see below). - -All extra credit features must be documented in your README, explaining its -value (with performance comparison, if applicable!) and showing an example how -it works. For radix sort, show how it is called and an example of its output. - -Always profile with Release mode builds and run without debugging. - -### Questions - -* Roughly optimize the block sizes of each of your implementations for minimal - run time on your GPU. - * (You shouldn't compare unoptimized implementations to each other!) - -* Compare all of these GPU Scan implementations (Naive, Work-Efficient, and - Thrust) to the serial CPU version of Scan. Plot a graph of the comparison - (with array size on the independent axis). - * You should use CUDA events for timing. Be sure **not** to include any - explicit memory operations in your performance measurements, for - comparability. - * To guess at what might be happening inside the Thrust implementation, take - a look at the Nsight timeline for its execution. - -* Write a brief explanation of the phenomena you see here. - * Can you find the performance bottlenecks? Is it memory I/O? Computation? Is - it different for each implementation? - -* Paste the output of the test program into a triple-backtick block in your - README. - * If you add your own tests (e.g. for radix sort or to test additional corner - cases), be sure to mention it explicitly. - -These questions should help guide you in performance analysis on future -assignments, as well. - -## Submit - -If you have modified any of the `CMakeLists.txt` files at all (aside from the -list of `SOURCE_FILES`), you must test that your project can build in Moore -100B/C. Beware of any build issues discussed on the Google Group. - -1. Open a GitHub pull request so that we can see that you have finished. - The title should be "Submission: YOUR NAME". -2. Send an email to the TA (gmail: kainino1+cis565@) with: - * **Subject**: in the form of `[CIS565] Project 2: PENNKEY` - * Direct link to your pull request on GitHub - * In the form of a grade (0-100+) with comments, evaluate your own - performance on the project. - * Feedback on the project itself, if any. +`Naive`: A naive (non-work-efficient) implementation of `scan`, performing O(n) +adds and O(logn) iterations. + +`Efficient`: A work-efficient implementation of `scan` and `compact`. Also +contins `dv_scan`, the actual in-place scan implementation which takes a device +memory pointer directly (useful for other CUDA functions which need scan, +bypassing the need to generate the host-memory-pointers that `Efficient::scan` +would take). Performs O(nlogn) adds and runs 2logn iterations. + +`Common`: +* `kernMapToBooleans`, used as the `filter`ing function in `Efficient::compact`. +* `kernScatter`, used in `Efficient::compact` and `Radix::sort`. + +`Radix`: `sort` is so close to working but... doesn't work :( + +## Performance Analysis + +I did performance analysis with `CudaEvent`s for the GPU algorithm +implementations and `std::chrono` for the CPU implementations. As before, code +for this can be found on the `performance` (to avoid cluttering the main +codebase). Raw data (csv format) can be found in `data/`. + +### Some fun charts + +Measuring the performance of scan with a block size of 128 (where applicable). + +![](data/scan_perf_zoomed_out.png) + +I cut the top of the CPU line off and my chart is still horribly skewed. Let's +try again: + +![](data/scan_perf_zoomed_in.png) + +Interestingly, the sharp(ish) increase in `thrust::scan` around N=14 is +consistent between runs. Maybe it has to do with an increase in memory +allocation around that size. + +`Naive` performs about twice as well as `Efficient`, which makes sense as the +work-efficient scan takes twice as many iterations of kernel calls. I suspect a +smarter method of spawning threads (only creating as many as you need instead of +creating 2^N every time and only using a subset) would improve performance on +the efficient algorithm, as it would result in more threads having the exact +same sequence of instructions to be executed. I think the performance gain on +efficient might be greater than `Naive` in this case because the `Efficient` +algorithm uses more iterations but fewer threads in each case, which would +explain why having a work-efficient algorithm is preferable. (I was planning +on testing this but -- as you can see -- I ran out of time.) + +There's a small amount of moving memory from the device to host in +`Efficient::scan` - I don't if that has an appreciable impact, since it only +needs to copy `sizeof(int)`. `Efficient::compact` has even more memory copying +to retrieve the size of the compacted stream. + +![](data/gpu_by_block_size.png) + +Tested on an array size of 2^16. `Naive::scan` and `Efficient::scan` are both +roughly optimal at a block size of 128. + +The performance of `Efficient::compact` is dominated by `Efficient::scan`. The +only other computation happening in `compact` is `kernMapToBoolean` and +`kernScatter`, both of which are constant (in fact, 1 operation per thread), and +memory copying (see above). + +Compact performance goes much the same way, to nobody's surprise: + +![](data/compact_by_array_size.png) + + +## Test output + +``` +**************** +** SCAN TESTS ** +**************** + [ 33 36 27 15 43 35 36 42 49 21 12 27 40 ... 28 0 ] +==== cpu scan, power-of-two ==== + [ 0 33 69 96 111 154 189 225 267 316 337 349 376 ... 6371 6399 ] +==== cpu scan, non-power-of-two ==== + [ 0 33 69 96 111 154 189 225 267 316 337 349 376 ... 6329 6330 ] + passed +==== naive scan, power-of-two ==== + [ 0 33 69 96 111 154 189 225 267 316 337 349 376 ... 6371 6399 ] + passed +==== naive scan, non-power-of-two ==== + passed +==== work-efficient scan, power-of-two ==== + [ 0 33 69 96 111 154 189 225 267 316 337 349 376 ... 6371 6399 ] + passed +==== work-efficient scan, non-power-of-two ==== + [ 0 33 69 96 111 154 189 225 267 316 337 349 376 ... 6329 6330 ] + passed +==== thrust scan, power-of-two ==== + passed +==== thrust scan, non-power-of-two ==== + passed + +***************************** +** STREAM COMPACTION TESTS ** +***************************** + [ 1 0 1 1 1 1 0 0 1 1 0 1 0 ... 0 0 ] +==== work-efficient compact, power-of-two ==== + passed +==== work-efficient compact, non-power-of-two ==== + passed +``` From 7df52e127b7a33827863e296125e165aa66e544c Mon Sep 17 00:00:00 2001 From: Terry Sun Date: Mon, 14 Sep 2015 06:21:24 -0400 Subject: [PATCH 13/14] Radix sort... doesn't work. --- src/testing_helpers.hpp | 17 ++++ stream_compaction/CMakeLists.txt | 2 + stream_compaction/radix.cu | 147 +++++++++++++++++++++++++++++++ stream_compaction/radix.h | 7 ++ 4 files changed, 173 insertions(+) create mode 100644 stream_compaction/radix.cu create mode 100644 stream_compaction/radix.h diff --git a/src/testing_helpers.hpp b/src/testing_helpers.hpp index f6b572f..4c93526 100644 --- a/src/testing_helpers.hpp +++ b/src/testing_helpers.hpp @@ -33,6 +33,23 @@ void printCmpLenResult(int n, int expN, T *a, T *b) { cmpArrays(n, a, b) ? "FAIL VALUE" : "passed"); } +template +int testArrayOrder(int n, T *a) { + for (int i = 0; i < n-1; i++) { + if (a[i] < a[i+1]) { + printf(" (a[%d] = %d) < (a[%d] = %d)\n", i, a[i], i+1, a[i+1]); + return 1; + } + } + return 0; +} + +template +void printArrayOrderResult(int n, T *a) { + printf(" %s \n", + testArrayOrder(n, a) ? "FAIL VALUE" : "passed"); +} + void zeroArray(int n, int *a) { for (int i = 0; i < n; i++) { a[i] = 0; diff --git a/stream_compaction/CMakeLists.txt b/stream_compaction/CMakeLists.txt index cdbef77..bcc484e 100644 --- a/stream_compaction/CMakeLists.txt +++ b/stream_compaction/CMakeLists.txt @@ -9,6 +9,8 @@ set(SOURCE_FILES "efficient.cu" "thrust.h" "thrust.cu" + "radix.h" + "radix.cu" ) cuda_add_library(stream_compaction diff --git a/stream_compaction/radix.cu b/stream_compaction/radix.cu new file mode 100644 index 0000000..2369c2d --- /dev/null +++ b/stream_compaction/radix.cu @@ -0,0 +1,147 @@ +#include +#include +#include "common.h" +#include "radix.h" +#include "efficient.h" + +namespace StreamCompaction { +namespace Radix { + +/* + * Get INVERTED `d`th digit of odata[k]. + */ +__global__ void kDigit(int n, int d, int *dv_odata, const int *dv_idata) { + int k = threadIdx.x; + if (k >= n) { return; } + dv_odata[k] = (dv_idata[k] & (1 << d)) > 0 ? 1 : 0; +} + +__global__ void kInvert(int n, int *odata, const int *idata) { + int k = threadIdx.x; + if (k >= n) { return; } + odata[k] = idata[k] == 0 ? 1 : 0; +} + +__global__ void kMapToIndex(int n, int *odata, int *b, int *f_indices, int pivot) { + int k = threadIdx.x; + if (k >= n) { return; } + odata[k] = (b[k] == 1) ? (k - f_indices[k] + pivot) : f_indices[k]; +} + +/* + * Implement split on device memory. + * Returns totalFalses (eg. the split point). + */ +__host__ int split(int n, int d, int *dv_odata, int *dv_idata) { + printf("---- split %d %d ----\n", n, d); + int array_size = n * sizeof(int); + int *TMP = (int*)malloc(array_size); + + int *b; + int *e; + int *t; + int *indices; + cudaMalloc((void**) &b, array_size); + cudaMalloc((void**) &e, array_size); + cudaMalloc((void**) &t, array_size); + cudaMalloc((void**) &indices, array_size); + + kDigit<<<1, n>>>(n, d, b, dv_idata); // b + printf("b: "); + cudaMemcpy(TMP, b, array_size, cudaMemcpyDeviceToHost); + for (int i = 0; i < n; i++) { printf("%d\t", TMP[i]); } printf("\n"); + kInvert<<<1, n>>>(n, e, b); // e + printf("e: "); + cudaMemcpy(TMP, e, array_size, cudaMemcpyDeviceToHost); + for (int i = 0; i < n; i++) { printf("%d\t", TMP[i]); } printf("\n"); + + int lastElt; + cudaMemcpy(&lastElt, e + n-1, sizeof(int), cudaMemcpyDeviceToHost); + + StreamCompaction::Efficient::dv_scan(n, e); // f IN PLACE OF e + + int totalFalses; + cudaMemcpy(&totalFalses, e + n-1, sizeof(int), cudaMemcpyDeviceToHost); + totalFalses += lastElt; + + printf("totalFalses = %d\n", totalFalses); + + kMapToIndex<<<1, n>>>(n, indices, b, e, totalFalses); + printf("indices: "); + cudaMemcpy(TMP, indices, array_size, cudaMemcpyDeviceToHost); + for (int i = 0; i < n; i++) { printf("%d\t", TMP[i]); } printf("\n"); + + StreamCompaction::Common::kernScatter<<<1, n>>>(n, dv_odata, indices, dv_idata); // scatter + printf("scattered: "); + cudaMemcpy(TMP, dv_odata, array_size, cudaMemcpyDeviceToHost); + for (int i = 0; i < n; i++) { printf("%d\t", TMP[i]); } printf("\n"); + + cudaFree(b); + return totalFalses; +} + +int testArrayOrder(int n, int *a) { + for (int i = 0; i < n-1; i++) { + if (a[i] > a[i+1]) { + return 1; + } + } + return 0; +} + +/* + * odata and idata are device memory points. + */ +__host__ void sortRecursive(int n, int d, int dmax, int *odata, int *idata) { + if (d >= dmax) { return; } + int pivot = split(n, d, odata, idata); + //sortRecursive(n, d+1, dmax, odata, odata); + //if (pivot != 0) { + // sortRecursive(pivot, d+1, dmax, odata, odata); + //} + //if (pivot != n) { + // sortRecursive(n-pivot, d+1, dmax, odata+n, odata+n); + //} +} + +__host__ void sortRecursive2(int n, int d, int dmax, int *odata, int *idata) { + if (d <= 0) { return; } + int pivot = split(n, d, odata, idata); + if (pivot != 0) { + sortRecursive(pivot, d-1, dmax, odata, odata); + } + if (pivot != n) { + sortRecursive(n-pivot, d-1, dmax, odata+n, odata+n); + } +} + +__host__ void sort(int n, int *odata, const int *idata) { + int max = idata[0]; + for (int i = 0; i < n; i++) { + if (idata[i] > max) { + max = idata[i]; + } + } + int maxDigits = ilog2ceil(max); + + int *dv_odata; + int *dv_idata; + int array_size = n * sizeof(int); + + cudaMalloc((void**) &dv_odata, array_size); + cudaMalloc((void**) &dv_idata, array_size); + cudaMemcpy(dv_idata, idata, array_size, cudaMemcpyHostToDevice); + + //sortRecursive(n, 0, maxDigits, dv_odata, dv_idata); + sortRecursive2(n, 0, maxDigits, dv_odata, dv_idata); + + cudaMemcpy(odata, dv_odata, array_size, cudaMemcpyDeviceToHost); + + //for (int i = 0; i < n; i++) { printf("%d\t%d\n", idata[i], odata[i]); } + + cudaFree(dv_odata); + cudaFree(dv_idata); +} + +} +} diff --git a/stream_compaction/radix.h b/stream_compaction/radix.h new file mode 100644 index 0000000..57d21d8 --- /dev/null +++ b/stream_compaction/radix.h @@ -0,0 +1,7 @@ +#pragma once + +namespace StreamCompaction { +namespace Radix { + void sort(int n, int *odata, const int *idata); +} +} From 5194a8849c7156051b783cd2a3c0b01d4ad9c3c5 Mon Sep 17 00:00:00 2001 From: Terry Sun Date: Mon, 14 Sep 2015 06:22:33 -0400 Subject: [PATCH 14/14] Performance measuring (via CudaEvent and std::chrono) --- src/main.cpp | 213 ++++++++++++++++++++++++++++----- stream_compaction/efficient.cu | 52 ++++++-- stream_compaction/efficient.h | 5 +- stream_compaction/naive.cu | 43 +++++-- stream_compaction/naive.h | 2 +- stream_compaction/thrust.cu | 15 ++- stream_compaction/thrust.h | 2 +- 7 files changed, 274 insertions(+), 58 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 675da35..90ef5cf 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,20 +6,148 @@ * @copyright University of Pennsylvania */ +#include #include + #include #include #include #include +#include #include "testing_helpers.hpp" +#define BENCHMARK + +void benchmarkCPU() { + const int iterations = 100; + int totalScan = 0; + int totalCompactWithout = 0; + int totalCompactWith = 0; + printf("size, scan, compactWithoutScan, compactWithScan\n"); + for (int s = 4; s < 20; s++) { + int SIZE = 1 << s; + int a[SIZE]; + int b[SIZE]; + + for (int i = 0; i < iterations; i++) { + genArray(SIZE - 1, a, 50); // Leave a 0 at the end to test that edge case + a[SIZE - 1] = 0; + + auto begin = std::chrono::high_resolution_clock::now(); + StreamCompaction::CPU::scan(SIZE, b, a); + auto end = std::chrono::high_resolution_clock::now(); + int diff = std::chrono::duration_cast(end-begin).count(); + totalScan += diff; + + begin = std::chrono::high_resolution_clock::now(); + StreamCompaction::CPU::compactWithoutScan(SIZE, b, a); + end = std::chrono::high_resolution_clock::now(); + diff = std::chrono::duration_cast(end-begin).count(); + totalCompactWithout += diff; + + begin = std::chrono::high_resolution_clock::now(); + StreamCompaction::CPU::compactWithScan(SIZE, b, a); + end = std::chrono::high_resolution_clock::now(); + diff = std::chrono::duration_cast(end-begin).count(); + totalCompactWith += diff; + } + printf("%d, %f, %f, %f\n", s, + (float)totalScan / iterations / 1000.0, + (float)totalCompactWithout / iterations / 1000.0, + (float)totalCompactWith / iterations / 1000.0 + ); + } +} + +void benchmarkGPUBlockSize() { + const int iterations = 100; + printf("block size, naive::scan, efficient::scan, efficient::compact\n"); + int SIZE = 1 << 16; + int a[SIZE]; + int b[SIZE]; + for (int block = 2; block < 11; block++) { + int blockSize = 1 << block; + + float totalNaive = 0; + float totalEfficientScan = 0; + float totalEfficientCompact = 0; + for (int i = 0; i < iterations; i++) { + genArray(SIZE - 1, a, 50); + a[SIZE - 1] = 0; + zeroArray(SIZE, b); + + float timeElapsed; + + StreamCompaction::Naive::scan(SIZE, b, a, &timeElapsed, blockSize); + totalNaive += timeElapsed; + + StreamCompaction::Efficient::scan(SIZE, b, a, &timeElapsed, blockSize); + totalEfficientScan += timeElapsed; + + StreamCompaction::Efficient::compact(SIZE, b, a, &timeElapsed, blockSize); + totalEfficientCompact += timeElapsed; + } + printf("%d, %f, %f, %f\n", block, + totalNaive / iterations, + totalEfficientScan / iterations, + totalEfficientCompact / iterations + ); + } +} + +void benchmarkGPUArraySize() { + const int iterations = 100; + printf("block size, naive::scan, efficient::scan, efficient::compact, thrust::scan\n"); + for (int s = 4; s < 20; s++) { + int SIZE = 1 << s; + int a[SIZE]; + int b[SIZE]; + + int blockSize = 1 << 7; + + float totalNaive = 0; + float totalEfficientScan = 0; + float totalEfficientCompact = 0; + float totalThrust = 0; + for (int i = 0; i < iterations; i++) { + genArray(SIZE - 1, a, 50); + a[SIZE - 1] = 0; + zeroArray(SIZE, b); + + float timeElapsed; + + StreamCompaction::Naive::scan(SIZE, b, a, &timeElapsed, blockSize); + totalNaive += timeElapsed; + + StreamCompaction::Efficient::scan(SIZE, b, a, &timeElapsed, blockSize); + totalEfficientScan += timeElapsed; + + StreamCompaction::Efficient::compact(SIZE, b, a, &timeElapsed, blockSize); + totalEfficientCompact += timeElapsed; + + StreamCompaction::Thrust::scan(SIZE, b, a, &timeElapsed); + totalThrust += timeElapsed; + } + printf("%d, %f, %f, %f, %f\n", s, + totalNaive / iterations, + totalEfficientScan / iterations, + totalEfficientCompact / iterations, + totalThrust / iterations + ); + } +} + int main(int argc, char* argv[]) { +#ifdef BENCHMARK + benchmarkCPU(); + benchmarkGPUBlockSize(); + benchmarkGPUArraySize(); +#else const int SIZE = 1 << 8; - const int NPOT = SIZE - 3; + //const int SIZE = 4; + const int NPOT_SIZE = SIZE - 3; int a[SIZE], b[SIZE], c[SIZE]; - // Scan tests - printf("\n"); printf("****************\n"); printf("** SCAN TESTS **\n"); @@ -36,33 +164,33 @@ int main(int argc, char* argv[]) { zeroArray(SIZE, c); printDesc("cpu scan, non-power-of-two"); - StreamCompaction::CPU::scan(NPOT, c, a); - printArray(NPOT, b, true); - printCmpResult(NPOT, b, c); + StreamCompaction::CPU::scan(NPOT_SIZE, c, a); + printArray(NPOT_SIZE, b, true); + printCmpResult(NPOT_SIZE, b, c); zeroArray(SIZE, c); printDesc("naive scan, power-of-two"); StreamCompaction::Naive::scan(SIZE, c, a); - //printArray(SIZE, c, true); + printArray(SIZE, c, true); printCmpResult(SIZE, b, c); zeroArray(SIZE, c); printDesc("naive scan, non-power-of-two"); - StreamCompaction::Naive::scan(NPOT, c, a); + StreamCompaction::Naive::scan(NPOT_SIZE, c, a); //printArray(SIZE, c, true); - printCmpResult(NPOT, b, c); + printCmpResult(NPOT_SIZE, b, c); zeroArray(SIZE, c); printDesc("work-efficient scan, power-of-two"); StreamCompaction::Efficient::scan(SIZE, c, a); - //printArray(SIZE, c, true); + printArray(SIZE, c, true); printCmpResult(SIZE, b, c); zeroArray(SIZE, c); printDesc("work-efficient scan, non-power-of-two"); - StreamCompaction::Efficient::scan(NPOT, c, a); - //printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); + StreamCompaction::Efficient::scan(NPOT_SIZE, c, a); + printArray(NPOT_SIZE, c, true); + printCmpResult(NPOT_SIZE, b, c); zeroArray(SIZE, c); printDesc("thrust scan, power-of-two"); @@ -72,42 +200,40 @@ int main(int argc, char* argv[]) { zeroArray(SIZE, c); printDesc("thrust scan, non-power-of-two"); - StreamCompaction::Thrust::scan(NPOT, c, a); - //printArray(NPOT, c, true); - printCmpResult(NPOT, b, c); + StreamCompaction::Thrust::scan(NPOT_SIZE, c, a); + //printArray(NPOT_SIZE, c, true); + printCmpResult(NPOT_SIZE, b, c); printf("\n"); printf("*****************************\n"); printf("** STREAM COMPACTION TESTS **\n"); printf("*****************************\n"); - // Compaction tests - - genArray(SIZE - 1, a, 4); // Leave a 0 at the end to test that edge case + genArray(SIZE - 1, a, 2); // Leave a 0 at the end to test that edge case a[SIZE - 1] = 0; printArray(SIZE, a, true); - int count, expectedCount, expectedNPOT; + int count, expectedCount, expectedNPOT_SIZE; zeroArray(SIZE, b); - printDesc("cpu compact without scan, power-of-two"); +// printDesc("cpu compact without scan, power-of-two"); count = StreamCompaction::CPU::compactWithoutScan(SIZE, b, a); expectedCount = count; - printArray(count, b, true); - printCmpLenResult(count, expectedCount, b, b); +// printArray(count, b, true); +// printCmpLenResult(count, expectedCount, b, b); zeroArray(SIZE, c); - printDesc("cpu compact without scan, non-power-of-two"); - count = StreamCompaction::CPU::compactWithoutScan(NPOT, c, a); - expectedNPOT = count; - printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); +// printDesc("cpu compact without scan, non-power-of-two"); + count = StreamCompaction::CPU::compactWithoutScan(NPOT_SIZE, c, a); + expectedNPOT_SIZE = count; +// printArray(count, c, true); +// printCmpLenResult(count, expectedNPOT_SIZE, b, c); zeroArray(SIZE, c); - printDesc("cpu compact with scan"); +// printDesc("cpu compact with scan"); count = StreamCompaction::CPU::compactWithScan(SIZE, c, a); - printArray(count, c, true); - printCmpLenResult(count, expectedCount, b, c); +// printArray(count, c, true); +// printCmpLenResult(count, expectedCount, b, c); zeroArray(SIZE, c); printDesc("work-efficient compact, power-of-two"); @@ -117,7 +243,28 @@ int main(int argc, char* argv[]) { zeroArray(SIZE, c); printDesc("work-efficient compact, non-power-of-two"); - count = StreamCompaction::Efficient::compact(NPOT, c, a); + count = StreamCompaction::Efficient::compact(NPOT_SIZE, c, a); //printArray(count, c, true); - printCmpLenResult(count, expectedNPOT, b, c); + printCmpLenResult(count, expectedNPOT_SIZE, b, c); + + printf("\n"); + printf("**********************\n"); + printf("** RADIX SORT TESTS **\n"); + printf("**********************\n"); + + genArray(SIZE - 1, a, 3); // Leave a 0 at the end to test that edge case + a[0] = 0; + a[1] = 2; + a[2] = 3; + a[3] = 1; + //a = { 0, 1, 2, 3 }; + //a[SIZE - 1] = 0; + printArray(SIZE, a, true); + + zeroArray(SIZE, c); + printDesc("radix sort, power-of-two"); + StreamCompaction::Radix::sort(SIZE, c, a); + printArray(SIZE, c, true); + printArrayOrderResult(SIZE, c); +#endif } diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu index 41e9125..2fb27f5 100644 --- a/stream_compaction/efficient.cu +++ b/stream_compaction/efficient.cu @@ -6,8 +6,10 @@ namespace StreamCompaction { namespace Efficient { +int BLOCK_SIZE = (2 << 7); + __global__ void kUpSweep(int d, int *data) { - int k = threadIdx.x; + int k = (blockDim.x * blockIdx.x) + threadIdx.x; int exp_d = (int)exp2f(d); int exp_d1 = (int)exp2f(d+1); if (k % exp_d1 == 0) { @@ -16,7 +18,7 @@ __global__ void kUpSweep(int d, int *data) { } __global__ void kDownSweep(int d, int *data) { - int k = threadIdx.x; + int k = (blockDim.x * blockIdx.x) + threadIdx.x; if (k % (int)exp2f(d+1) == 0) { int left = k + (int)exp2f(d) - 1; int right = k + (int)exp2f(d+1) - 1; @@ -30,22 +32,27 @@ __global__ void kDownSweep(int d, int *data) { * In-place scan on `dev_idata`, which must be a device memory pointer. */ void dv_scan(int n, int *dev_idata) { + int numBlocks = (n + BLOCK_SIZE - 1) / BLOCK_SIZE; + for (int d = 0; d < ilog2ceil(n)-1; d++) { - kUpSweep<<<1, n>>>(d, dev_idata); + kUpSweep<<>>(d, dev_idata); + checkCUDAError("scan"); } int z = 0; cudaMemcpy(&dev_idata[n-1], &z, sizeof(int), cudaMemcpyHostToDevice); for (int d = ilog2ceil(n)-1; d >= 0; d--) { - kDownSweep<<<1, n>>>(d, dev_idata); + kDownSweep<<>>(d, dev_idata); + checkCUDAError("scan"); } } /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ -void scan(int size, int *odata, const int *input) { +void scan(int size, int *odata, const int *input, float *time, int blockSize) { + BLOCK_SIZE = blockSize; int *idata; int n; @@ -62,14 +69,26 @@ void scan(int size, int *odata, const int *input) { memcpy(idata, input, n * sizeof(int)); } - int *dv_idata; + int array_size = n * sizeof(int); + int *dv_idata; cudaMalloc((void**) &dv_idata, array_size); cudaMemcpy(dv_idata, idata, array_size, cudaMemcpyHostToDevice); + cudaEvent_t begin, end; + cudaEventCreate(&begin); + cudaEventCreate(&end); + cudaEventRecord(begin, 0); + dv_scan(n, dv_idata); + cudaEventRecord(end, 0); + cudaEventSynchronize(end); + cudaEventElapsedTime(time, begin, end); + cudaEventDestroy(begin); + cudaEventDestroy(end); + cudaMemcpy(odata, dv_idata, array_size, cudaMemcpyDeviceToHost); cudaFree(dv_idata); } @@ -83,7 +102,9 @@ void scan(int size, int *odata, const int *input) { * @param idata The array of elements to compact. * @returns The number of elements remaining after compaction. */ -int compact(int size, int *odata, const int *input) { +int compact(int size, int *odata, const int *input, float *time, int blockSize) { + BLOCK_SIZE = blockSize; + int *idata; int n; @@ -104,6 +125,7 @@ int compact(int size, int *odata, const int *input) { int *dev_odata; int *dev_idata; int array_size = n * sizeof(int); + int numBlocks = (n + BLOCK_SIZE - 1) / BLOCK_SIZE; cudaMalloc((void**) &dev_indices, array_size); cudaMalloc((void**) &dev_odata, array_size); @@ -111,7 +133,12 @@ int compact(int size, int *odata, const int *input) { cudaMalloc((void**) &dev_idata, array_size); cudaMemcpy(dev_idata, idata, array_size, cudaMemcpyHostToDevice); - StreamCompaction::Common::kernMapToBoolean<<<1, n>>>(n, dev_indices, dev_idata); + cudaEvent_t begin, end; + cudaEventCreate(&begin); + cudaEventCreate(&end); + cudaEventRecord(begin, 0); + + StreamCompaction::Common::kernMapToBoolean<<>>(n, dev_indices, dev_idata); int last; cudaMemcpy(&last, dev_indices + n-1, sizeof(int), cudaMemcpyDeviceToHost); @@ -120,7 +147,14 @@ int compact(int size, int *odata, const int *input) { int streamSize; cudaMemcpy(&streamSize, dev_indices + n-1, sizeof(int), cudaMemcpyDeviceToHost); - StreamCompaction::Common::kernScatter<<<1, n>>>(n, dev_odata, dev_indices, dev_idata); + StreamCompaction::Common::kernScatter<<>>(n, dev_odata, dev_indices, dev_idata); + + cudaEventRecord(end, 0); + cudaEventSynchronize(end); + cudaEventElapsedTime(time, begin, end); + cudaEventDestroy(begin); + cudaEventDestroy(end); + cudaMemcpy(odata, dev_odata, array_size, cudaMemcpyDeviceToHost); // The kernel always copies the last elt. diff --git a/stream_compaction/efficient.h b/stream_compaction/efficient.h index 395ba10..76fbc36 100644 --- a/stream_compaction/efficient.h +++ b/stream_compaction/efficient.h @@ -2,8 +2,9 @@ namespace StreamCompaction { namespace Efficient { - void scan(int n, int *odata, const int *idata); + void dv_scan(int n, int *dev_idata); + void scan(int size, int *odata, const int *input, float *time, int blockSize); - int compact(int n, int *odata, const int *idata); + int compact(int n, int *odata, const int *idata, float *time, int blockSize); } } diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu index a447c5f..ba0aa94 100644 --- a/stream_compaction/naive.cu +++ b/stream_compaction/naive.cu @@ -7,7 +7,7 @@ namespace StreamCompaction { namespace Naive { __global__ void kScan(int d, int *odata, const int *idata) { - int k = threadIdx.x; + int k = (blockDim.x * blockIdx.x) + threadIdx.x; if (k >= (int)exp2f(d-1)) { odata[k] = idata[k - (int)exp2f(d-1)] + idata[k]; } else { @@ -15,13 +15,25 @@ __global__ void kScan(int d, int *odata, const int *idata) { } } +__global__ void kShift(int n, int *odata, int *idata) { + int k = (blockDim.x * blockIdx.x) + threadIdx.x; + if (k >= n) { return; } + if (k == 0) { + odata[0] = 0; + } else { + odata[k] = idata[k-1]; + } +} + /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ -__host__ void scan(int n, int *odata, const int *idata) { +__host__ void scan(int n, int *odata, const int *idata, float *time, int blockSize) { + int array_size = n * sizeof(int); + int numBlocks = (n + blockSize - 1) / blockSize; + int *A; int *B; - int array_size = n * sizeof(int); cudaMalloc((void**) &A, array_size); cudaMalloc((void**) &B, array_size); @@ -29,21 +41,30 @@ __host__ void scan(int n, int *odata, const int *idata) { int *in; int *out; + + cudaEvent_t begin, end; + cudaEventCreate(&begin); + cudaEventCreate(&end); + + cudaEventRecord(begin, 0); + for (int d = 1; d < ilog2ceil(n)+1; d++) { in = (d % 2 == 1) ? A : B; out = (d % 2 == 1) ? B : A; - kScan<<<1, n>>>(d, out, in); + kScan<<>>(d, out, in); checkCUDAError("scan"); - cudaDeviceSynchronize(); } - cudaMemcpy(odata, out, array_size, cudaMemcpyDeviceToHost); - // shift odata to the right for exclusive scan - for (int i = n-1; i >= 0; i--) { - odata[i+1] = odata[i]; - } - odata[0] = 0; + kShift<<>>(n, in, out); + + cudaEventRecord(end, 0); + cudaEventSynchronize(end); + cudaEventElapsedTime(time, begin, end); + cudaEventDestroy(begin); + cudaEventDestroy(end); + + cudaMemcpy(odata, in, array_size, cudaMemcpyDeviceToHost); cudaFree(A); cudaFree(B); diff --git a/stream_compaction/naive.h b/stream_compaction/naive.h index 21152d6..3c8578d 100644 --- a/stream_compaction/naive.h +++ b/stream_compaction/naive.h @@ -2,6 +2,6 @@ namespace StreamCompaction { namespace Naive { - void scan(int n, int *odata, const int *idata); + void scan(int n, int *odata, const int *idata, float *time, int blockSize); } } diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu index 10c3cfb..9791b97 100644 --- a/stream_compaction/thrust.cu +++ b/stream_compaction/thrust.cu @@ -12,10 +12,23 @@ namespace Thrust { /** * Performs prefix-sum (aka scan) on idata, storing the result into odata. */ -void scan(int n, int *odata, const int *idata) { +void scan(int n, int *odata, const int *idata, float *time) { thrust::device_vector ivec(idata, idata+n); thrust::device_vector ovec(odata, odata+n); + + cudaEvent_t begin, end; + cudaEventCreate(&begin); + cudaEventCreate(&end); + cudaEventRecord(begin, 0); + thrust::exclusive_scan(ivec.begin(), ivec.end(), ovec.begin()); + + cudaEventRecord(end, 0); + cudaEventSynchronize(end); + cudaEventElapsedTime(time, begin, end); + cudaEventDestroy(begin); + cudaEventDestroy(end); + thrust::copy(ovec.begin(), ovec.end(), odata); } diff --git a/stream_compaction/thrust.h b/stream_compaction/thrust.h index 06707f3..4408a15 100644 --- a/stream_compaction/thrust.h +++ b/stream_compaction/thrust.h @@ -2,6 +2,6 @@ namespace StreamCompaction { namespace Thrust { - void scan(int n, int *odata, const int *idata); + void scan(int n, int *odata, const int *idata, float *time); } }