diff --git a/transitive_closure/CMakeLists.txt b/transitive_closure/CMakeLists.txt index ebf5b0f..a410232 100644 --- a/transitive_closure/CMakeLists.txt +++ b/transitive_closure/CMakeLists.txt @@ -1,2 +1,2 @@ -add_executable(transitive_closure transitive_closure.cpp timestamps.cpp) +add_executable(transitive_closure main.cpp timestamps.cpp graph.cpp) target_link_libraries(transitive_closure fmt::fmt spdlog::spdlog CLI11::CLI11) \ No newline at end of file diff --git a/transitive_closure/graph.cpp b/transitive_closure/graph.cpp new file mode 100644 index 0000000..9ea7b48 --- /dev/null +++ b/transitive_closure/graph.cpp @@ -0,0 +1,192 @@ +// Inspired by: https://www.geeksforgeeks.org/how-to-create-a-random-graph-in-c/ + +// Why to linearize: https://community.khronos.org/t/matrix-multiplication-using-2d-std-vector/106457#:~:text=On%20a%20more,be%20known%20statically. + +#include +#include +#include +#include "graph.h" + +// Function to generate a directed graph that has no loops and no duplicate edges. The resuling graph is +// stored in a 1-D vector. +// Resizing graph is done in main. +int generate_simple_directed_linear(std::vector & graph, int edges, int vertices) +{ + if (edges > vertices * (vertices - 1)) { + spdlog::warn("Too many edges."); + return -1; + } + + std::vector> edge(edges, std::vector(2)); + int i = 0; + + // Generate directed graph. + while (i < edges) { + edge[i][0] = rand() % vertices; + edge[i][1] = rand() % vertices; + + // Discard loop. + if (edge[i][0] == edge[i][1]) { + continue; + } + // Discard duplicate edge. + else { + for (int j = 0; j < i; j++) { + if (( + edge[i][0] == edge[j][0] && + edge[i][1] == edge[j][1] + )) + { + i--; + } + } + } + i++; + } + + // Initialize linear adjacency matrix. + int vertex_one, vertex_two; + for (int j = 0; j < edges; j++) { + vertex_one = edge[j][0]; + vertex_two = edge[j][1]; + graph[vertex_one * vertices + vertex_two] = 1; + } + + return 1; +} + +// Generate directed graph provided pointer. +int generate_simple_directed_linear(int *graph, int edges, int vertices) +{ + if (edges > vertices * (vertices - 1)) { + spdlog::warn("Too many edges."); + return -1; + } + + std::vector> edge(edges, std::vector(2)); + int i = 0; + + // Generate directed graph. + while (i < edges) { + edge[i][0] = rand() % vertices; + edge[i][1] = rand() % vertices; + + // Discard loop. + if (edge[i][0] == edge[i][1]) { + continue; + } + // Discard duplicate edge. + else { + for (int j = 0; j < i; j++) { + if (( + edge[i][0] == edge[j][0] && + edge[i][1] == edge[j][1] + )) + { + i--; + } + } + } + i++; + } + + // Initialize linear adjacency matrix. + int vertex_one, vertex_two; + for (int j = 0; j < edges; j++) { + vertex_one = edge[j][0]; + vertex_two = edge[j][1]; + graph[vertex_one * vertices + vertex_two] = 1; + } + + return 1; +} + + + + + + +// #### DEPRECATED ##### +// +// This function generates a directed graph that has no loops and no duplicate edges. The resulting graph +// is stored in a 2-D vector. +int generate_simple_directed_graph(std::vector> & adj_matrix, int num_edge, int num_vertex) +{ + // Maximum number of edges = n x (n - 1) + if (num_edge > num_vertex * (num_vertex - 1)) { + std::cout << "Too many edges." << std::endl; + return -1; + } + + // Resizing adj_matrix (num_vertex by num_vertex) is now done in main. + //adj_matrix.resize(num_vertex, std::vector(num_vertex, 0)); + + int edge[num_edge][2]; + int i = 0; + // Generate directed graph. + while (i < num_edge) { + // Generate the vertices connected by edge. + edge[i][0] = rand() % num_vertex; + edge[i][1] = rand() % num_vertex; + + // Discard loop. + if (edge[i][0] == edge[i][1]) { + continue; + } + // Discard duplicate edge. + else { + for (int j = 0; j < i; j++) { + if (( + edge[i][0] == edge[j][0] && + edge[i][1] == edge[j][1] + )) + { + i--; + } + } + } + i++; + } + + // Initialize adjacency matrix. + int vertex_one, vertex_two; + for (int j = 0; j < num_edge; j++) { + // Set entries with connecting edges to 1. + vertex_one = edge[j][0]; + vertex_two = edge[j][1]; + adj_matrix[vertex_one][vertex_two] = 1; + } + + return 1; +} + +// This function linearizes parameter adj_matrix and stores in linear_adj_matrix. +void linearize(std::vector> & adj_matrix, std::vector & linear_adj_matrix, int num_vertex) +{ + linear_adj_matrix.resize(num_vertex * num_vertex); + for (int i = 0; i < num_vertex; i++) { + for (int j = 0; j < num_vertex; j++) { + linear_adj_matrix[i * num_vertex + j] = adj_matrix[i][j]; + } + } +} + +void print_adj_matrix(std::vector> & adj_matrix, int num_vertex) +{ + for (int i = 0; i < num_vertex; i++) { + for (int j = 0; j < num_vertex; j++) { + std::cout << adj_matrix[i][j] << " "; + } + std::cout << std::endl; + } +} + +void print_linear_adj_matrix(std::vector & linear_adj_matrix, int num_vertex) +{ + for (int i = 0; i < num_vertex * num_vertex; i++) { + std::cout << linear_adj_matrix[i] << " "; + if ((i+1)%num_vertex==0) { + std::cout << std::endl; + } + } +} \ No newline at end of file diff --git a/transitive_closure/graph.h b/transitive_closure/graph.h new file mode 100644 index 0000000..0b3884e --- /dev/null +++ b/transitive_closure/graph.h @@ -0,0 +1,41 @@ +#ifndef GRAPH_H +#define GRAPH_H + +#include +#include + +int generate_simple_directed_linear( + std::vector & graph, + int edges, + int vertices + ); + +int generate_simple_directed_linear( + int *graph, + int edges, + int vertices + ); + +int generate_simple_directed_graph( + std::vector> & adj_matrix, + int num_edge, + int num_vertex + ); + +void linearize( + std::vector> & adj_matrix, + std::vector & linear_adj_matrix, + int num_vertex + ); + +void print_adj_matrix( + std::vector> & adj_matrix, + int num_vertex + ); + +void print_linear_adj_matrix( + std::vector & linear_adj_matrix, + int num_vertex + ); + +#endif \ No newline at end of file diff --git a/transitive_closure/main.cpp b/transitive_closure/main.cpp new file mode 100644 index 0000000..58e8769 --- /dev/null +++ b/transitive_closure/main.cpp @@ -0,0 +1,377 @@ +// MAIN +// 1. Parse CLI. +// 2. Set up device. +// 3. Allocate USM memory. +// 4. Allocate Buffer memory. +// 5. Run sequential ? Use USM else use Buffer. +// 6. Run parallel ? use USM else use Buffer. +// +// +// For buffer execution: +// 1. Allocate and initialize graph vector on host. +// 2. Shove graph vector into sycl buffer C. +// 3. Copy sycl buffer C to sycl buffer A (Warshall P1). +// 4. Do computation on sycl buffer A (Warshall P1). +// 5. Access sycl buffer A on host. +// 6. Complete. +// +// +// For USM execution: +// 1. Allocate and initialize graph pointer on host. +// 2. Copy contents of graph into USM C. +// 3. Copy USM C to USM A (Warshall P1). +// 4. Do computation on USM A (Warshall P2). +// 5. Access USM on host. +// 6. Complete. +// +// TODO: look moore into malloc_shared + +#include +#include + +#include +#include +#include +#include + +#include "timestamps.h" +#include "plf_nanotimer.h" +#include "graph.h" + +int verify_results(sycl::buffer &a_parallel, std::vector A_sequential, int vertices); +int verify_results(int *A_parallel, int *A_sequential, int vertices); +void warshall_parallel_p1(sycl::queue &q, sycl::buffer &a, std::vector &c, int vertices); +void warshall_parallel_p2(sycl::queue &q, sycl::buffer &a, int vertices); +void warshall_sequential_p1(std::vector &A, std::vector &C, int vertices); +void warshall_sequential_p2(std::vector &A, int vertices); +void warshall_parallel_p1(sycl::queue &q, int *A, int *C, int vertices); +void warshall_parallel_p2(sycl::queue &q, int *A, int vertices); +void warshall_sequential_p1(int *A, int *C, int vertices); +void warshall_sequential_p2(int *A, int vertices); + + +int main(const int argc, const char *const argv[]) +{ + // Main declarations. + std::vector> timestamps; + double time_result; + + // Default values. + int edges{380}; + int vertices{200}; + bool run_sequential{false}; + bool run_parallel{false}; + bool run_cpuonly{false}; + bool use_usm{false}; + bool print{false}; + + // CLI setup and parse. + CLI::App app{"Transitive Closure"}; + app.option_defaults()->always_capture_default(true); + app.add_option("-e, --edges", edges); + app.add_option("-v, --vertices", vertices); + app.add_flag("-s, --sequential", run_sequential); + app.add_flag("-p, --parallel", run_parallel); + app.add_flag("-c, --cpu-only", run_cpuonly); + app.add_flag("-u, --use-usm", use_usm); + CLI11_PARSE(app, argc, argv); + + plf::nanotimer time_total; + time_total.start(); + + // Sycl queue creation + spdlog::info("setting up queue"); + plf::nanotimer time_queue_creation; + time_queue_creation.start(); + sycl::device device{run_cpuonly ? sycl::cpu_selector_v : sycl::default_selector_v}; + sycl::queue q{device, dpc_common::exception_handler}; + std::string device_name = q.get_device().get_info(); + uint device_max_compute_units = q.get_device().get_info(); + spdlog::info("device name: {}, device max compute units: {}", device_name, device_max_compute_units); + time_result = time_queue_creation.get_elapsed_ns(); + mark_time(timestamps, time_result, "Sycl queue creation"); + + + + // Allocate memory using USM and pointers + plf::nanotimer time_memory_alloc_usm; + time_memory_alloc_usm.start(); + int *graph_usm = sycl::malloc_shared(vertices * vertices, q); + int *sequential_usm = sycl::malloc_shared(vertices * vertices, q); + int *parallel_usm = sycl::malloc_shared(vertices * vertices, q); + + if ((graph_usm == nullptr) || (sequential_usm == nullptr) || + (parallel_usm == nullptr)) { + if (graph_usm != nullptr) free(graph_usm, q); + if (sequential_usm != nullptr) free(sequential_usm, q); + if (parallel_usm != nullptr) free(parallel_usm, q); + + spdlog::error("USM memory allocation failure"); + return -1; + } + time_result = time_memory_alloc_usm.get_elapsed_ns(); + mark_time(timestamps, time_result, "USM memory allocation"); + + plf::nanotimer time_init_buffer; + time_init_buffer.start(); + + if (!generate_simple_directed_linear(graph_usm, edges, vertices)) { + spdlog::info("(USM) Failed to generate simple directed graph with edges: {} and vertices: {}", edges, vertices); + if (graph_usm != nullptr) free(graph_usm, q); + if (sequential_usm != nullptr) free(sequential_usm, q); + if (parallel_usm != nullptr) free(parallel_usm, q); + return -1; + } + time_result = time_init_buffer.get_elapsed_ns(); + mark_time(timestamps, time_result, "USM graph initialization"); + + + + // Allocate memory for parallel and sequential execution. + plf::nanotimer time_memory_alloc; + time_memory_alloc.start(); + spdlog::info("Allocating memory for graph data."); + std::vector graph_vec(vertices * vertices); + spdlog::info("Allocating memory for sequential result."); + std::vector sequential_vec(vertices * vertices); + spdlog::info("Allocating memory for parallel result."); + sycl::buffer parallel_buf(sycl::range(vertices, vertices)); + time_result = time_memory_alloc.get_elapsed_ns(); + mark_time(timestamps, time_result, "Buffer memory allocation"); + + // Initialize adjacency matrix. + spdlog::info("Generating simple directed graph with edges: {} and vertices: {}", edges, vertices); + if (!generate_simple_directed_linear(graph_vec, edges, vertices)) { + spdlog::info("(Buffer) Failed to generate simple directed graph with edges: {} and vertices: {}", edges, vertices); + return 1; + } + + if (run_sequential) { + plf::nanotimer time_sequential; + if (use_usm) { + // TODO implement overload function for + spdlog::info("Warshall sequential with USM begin"); + time_sequential.start(); + warshall_sequential_p1(sequential_usm, graph_usm, vertices); + warshall_sequential_p2(sequential_usm, vertices); + time_result = time_sequential.get_elapsed_ns(); + mark_time(timestamps, time_result, "Warshall USM sequential"); + spdlog::info("Warshall USM sequential end"); + } + else { // Use buffers. + spdlog::info("Warshall sequential with buffers begin"); + time_sequential.start(); + warshall_sequential_p1(sequential_vec, graph_vec, vertices); + warshall_sequential_p2(sequential_vec, vertices); + time_result = time_sequential.get_elapsed_ns(); + mark_time(timestamps, time_result, "Warshall sequential"); + spdlog::info("Warshall sequential end"); + } + } + + if (run_parallel) { + plf::nanotimer time_parallel; + if (use_usm) { + //TODO - implement overloaded function for warshall parallel w/ pointers. + spdlog::info("Warshall parallel with USM begin"); + time_parallel.start(); + warshall_parallel_p1(q, parallel_usm, graph_usm, vertices); + warshall_parallel_p2(q, parallel_usm, vertices); + time_result = time_parallel.get_elapsed_ns(); + mark_time(timestamps, time_result, "Warshall USM parallel"); + spdlog::info("Warshall USM parallel end"); + } + else { // Use buffers. + spdlog::info("Warshall parallel with Buffers begin"); + time_parallel.start(); + warshall_parallel_p1(q, parallel_buf, graph_vec, vertices); + warshall_parallel_p2(q, parallel_buf, vertices); + const sycl::host_accessor trigger_work{parallel_buf}; + time_result = time_parallel.get_elapsed_ns(); + mark_time(timestamps, time_result, "Warshall Buffers parallel"); + spdlog::info("Warshall Buffers parallel end"); + } + } + + if (run_sequential && run_parallel) { + spdlog::info("Verify results begin"); + plf::nanotimer time_verify; + time_verify.start(); + if (use_usm) { + if (!verify_results(parallel_usm, sequential_usm, vertices)) { + spdlog::warn("Results do not match!"); + } + else { + spdlog::info("Results match!"); + } + } + else { + if (!verify_results(parallel_buf, sequential_vec, vertices)) { + spdlog::warn("Results do not match!"); + } + else { + spdlog::info("Results match!"); + } + } + time_result = time_verify.get_elapsed_ns(); + mark_time(timestamps, time_result, "Verify results"); + spdlog::info("Verify results end"); + } + + if (graph_usm != nullptr) { free(graph_usm, q); } + if (sequential_usm != nullptr) { free(sequential_usm, q); } + if (parallel_usm != nullptr) { free(parallel_usm, q); } + + time_result = time_total.get_elapsed_ns(); + mark_time(timestamps, time_result, "Total time"); + print_timestamps(timestamps); + spdlog::info("All done"); + return 0; +} + +// Function compares sequential and parallel result. +int verify_results(sycl::buffer &a_parallel, std::vector A_sequential, int vertices) +{ + const sycl::host_accessor A_parallel{a_parallel}; + for (int i = 0; i < vertices; i++) { + for (int j = 0; j < vertices; j++) { + if (A_parallel[i][j] != A_sequential[i * vertices + j]) { + return -1; + } + } + } + return 1; +} + +int verify_results(int *A_parallel, int *A_sequential, int vertices) +{ + for (int i = 0; i < vertices; i++) { + for (int j = 0; j < vertices; j++) { + if (A_parallel[i * vertices + j] != A_sequential[i * vertices + j]) { + return -1; + } + } + } + return 1; +} + +// Warshall procedure +// Step 1: copy adjacency matrix C into matrix A +// for i := 1 to n do +// for j := 1 to n do +// A[i, j] := C[i, j] +// +// +// Parallel part 1 w/ Buffers. +void warshall_parallel_p1(sycl::queue &q, sycl::buffer &a, std::vector &c, int vertices) +{ + sycl::buffer c_buf(c.data(), sycl::range<1>{c.size()}); + q.submit([&](auto &h) { + const sycl::accessor C(c_buf, h, sycl::read_only); + const sycl::accessor A(a, h, sycl::write_only); + + h.parallel_for(sycl::range(vertices, vertices), [=](auto index) { + int i = index[0]; + int j = index[1]; + A[i][j] = C[i * vertices + j]; + }); + }); +} + +// Parallel part 1 w/ USM. +//TODO +void warshall_parallel_p1(sycl::queue &q, int *A, int *C, int vertices) +{ + q.submit([&](auto &h) { + h.parallel_for(sycl::range(vertices * vertices), [=](auto index) { + A[index] = C[index]; + }); + }); +} + +// Parallel part 2 w/ USM. +//TODO +void warshall_parallel_p2(sycl::queue &q, int *A, int vertices) +{ + for (int k = 0; k < vertices; k++) { + q.submit([&](auto &h) { + h.parallel_for(sycl::range(vertices * vertices), [=](auto index) { + int i = index / vertices; + int j = index % vertices; + A[i * vertices + j] = A[i * vertices + j] || A[i * vertices + k] && A[k * vertices + j]; + }); + }); + } +} + +// Warshall procedure +// Step 2: compute the transitive closure of C as A +// +// for k := 1 to n do +// for i := 1 to n do +// for j := 1 to n do +// if A[i, j] = false then +// A[i, j] := A[i, k] and A[k, j] +// +// note: A[index] = A[i][j] +// +// +// Parallel part 2 w/ Buffers. +void warshall_parallel_p2(sycl::queue &q, sycl::buffer &a, int vertices) +{ + for (int k = 0; k < vertices; k++) { + q.submit([&](auto &h) { + const sycl::accessor A(a, h, sycl::read_write); + + h.parallel_for(sycl::range(vertices, vertices), [=](auto index) { + int i = index[0]; + int j = index[1]; + A[i][j] = A[i][j] || A[i][k] && A[k][j]; + }); + }); + } +} + +// Sequential part 1 w/ Buffers. +void warshall_sequential_p1(std::vector &A, std::vector &C, int vertices) +{ + for (int i = 0; i < vertices; i++) { + for (int j = 0; j < vertices; j++) { + A[i * vertices + j] = C[i * vertices + j]; + } + } +} + +// Sequential part 1 w/ USM. +void warshall_sequential_p1(int *A, int *C, int vertices) +{ + for (int i = 0; i < vertices; i++) { + for (int j = 0; j < vertices; j++) { + A[i * vertices + j] = C[i * vertices + j]; + } + } +} + +// Sequential part 2 w/ Buffers. +void warshall_sequential_p2(std::vector &A, int vertices) +{ + for (int k = 0; k < vertices; k++) { + for (int i = 0; i < vertices; i++) { + for (int j = 0; j < vertices; j++) { + A[i * vertices + j] = A[i * vertices + j] || A[i * vertices + k] && A[k * vertices + j]; + } + } + } +} + +// Sequential part 2 w/ USM. +void warshall_sequential_p2(int *A, int vertices) +{ + for (int k = 0; k < vertices; k++) { + for (int i = 0; i < vertices; i++) { + for (int j = 0; j < vertices; j++) { + A[i * vertices + j] = A[i * vertices + j] || A[i * vertices + k] && A[k * vertices + j]; + } + } + } +} \ No newline at end of file diff --git a/transitive_closure/test.txt b/transitive_closure/test.txt deleted file mode 100644 index b1cfde7..0000000 --- a/transitive_closure/test.txt +++ /dev/null @@ -1 +0,0 @@ -4 4 1 1 0 1 0 1 1 0 0 0 1 1 0 0 0 1 \ No newline at end of file diff --git a/transitive_closure/transitive_closure.cpp b/transitive_closure/transitive_closure.cpp deleted file mode 100644 index dc01b0a..0000000 --- a/transitive_closure/transitive_closure.cpp +++ /dev/null @@ -1,261 +0,0 @@ -#include -#include - -#include -#include -#include -#include - -#include "timestamps.h" -#include "plf_nanotimer.h" - -int verify_matrix_squared(std::string & filename, int & n) -{ - int rows, columns; - std::ifstream input(filename); - - if (!input.is_open()) { - std::cout << "The file could not be opened."; - return 1; - } - - input >> rows >> columns; - n = rows; - - if (rows != columns) { return 1; } - - input.close(); - return 0; -} - -int populate_matrix(std::string & filename, sycl::buffer & C_buf) -{ - const sycl::host_accessor C(C_buf); - std::ifstream input(filename); - - if (!input.is_open()) { - std::cout << "The file could not be opened."; - return 1; - } - - int rows, columns; - input >> rows >> columns; - - if (rows != columns) { return 1; } - - for (int i = 0; i < rows; i++) { - for (int j = 0; j < columns; j++) { - input >> C[i][j]; - } - } - - input.close(); - return 0; - -} - -void print_matrix(const sycl::host_accessor & A, int n) -{ - std::cout << "Transitive Closure A of C: " << std::endl; - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - std::cout << A[i][j] << " "; - } - std::cout << std::endl; - } -} - -int main(const int argc, const char *const argv[]) -{ - // main declarations::begin - std::string input; // file to adjacency matrix - int n; // squared matrix dimension - std::string device_name; - std::vector> timestamps; - double time_result; - // main declarations::end - - // main inits::begin - bool run_sequentially{false}; - bool run_cpuonly{false}; - bool print{false}; - // main inits::end - - // cli setup and parse::begin - CLI::App app{"Transitive Closure"}; - app.option_defaults()->always_capture_default(true); - app.add_option("-i, --input", input); - app.add_flag("-s, --sequential", run_sequentially); - app.add_flag("-c, --cpu-only", run_cpuonly); - app.add_flag("-p, --print", print); - CLI11_PARSE(app, argc, argv); - // cli setup and parse::end - - plf::nanotimer time_total; - time_total.start(); - - // get dimensions of adjacency matrix::begin - spdlog::info("attempting to verify square-ness of adjacency matrix using input: {}", input); - if (verify_matrix_squared(input, n) == 1) { - spdlog::error("error when verifying square-ness of adjacency matrix using input: {}", input); - return 1; - } - // get dimensions of adjacency matrix::end - - // declare sycl buffers::begin - plf::nanotimer time_buffer_dec; - time_buffer_dec.start(); - sycl::buffer C_buf(sycl::range(n, n)); - sycl::buffer A_buf(sycl::range(n, n)); - time_result = time_buffer_dec.get_elapsed_ns(); - mark_time(timestamps, time_result, "declare sycl buffers"); - // declare sycl buffers::end - - // init sycl buffers::begin - plf::nanotimer time_buffer_init; - time_buffer_init.start(); - spdlog::info("attempting to populate adjacency matrix using input: {}", input); - if (populate_matrix(input, C_buf) == 1) { - spdlog::error("error when populating adjacency matrix using input: {}", input); - return 1; - } - time_result = time_buffer_init.get_elapsed_ns(); - mark_time(timestamps, time_result, "populate sycl buffer with adj matrix"); - // init sycl buffers::end - - // run sequential::begin - if (run_sequentially) { - spdlog::info("starting sequential execution block"); - plf::nanotimer time_sequential; // time sequential block::begin - time_sequential.start(); - - // init host accessors::begin - spdlog::info("initializing host accessors"); - plf::nanotimer time_init_host; - time_init_host.start(); - const sycl::host_accessor C(C_buf); - const sycl::host_accessor A(A_buf); - time_result = time_init_host.get_elapsed_ns(); - mark_time(timestamps, time_result, "host accessor creation"); - // init host accessors::end - - // Warshall procedure step 1::begin - spdlog::info("starting Warshall procedure step 1"); - plf::nanotimer time_warshall_p1; - time_warshall_p1.start(); - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - A[i][j] = C[i][j]; - } - } - time_result = time_warshall_p1.get_elapsed_ns(); - mark_time(timestamps, time_result, "Warshall procedure step 1"); - // Warshall procedure step 1::end - - // Warshall procedure step 2::begin - spdlog::info("starting Warshall procedure step 2"); - plf::nanotimer time_warshall_p2; - time_warshall_p2.start(); - for (int k = 0; k < n; k++) { - for (int i = 0; i < n; i++) { - for (int j = 0; j < n; j++) { - A[i][j] = A[i][j] || A[i][k] * A[k][j]; - } - } - } - time_result = time_warshall_p2.get_elapsed_ns(); - mark_time(timestamps, time_result, "Warshall procedure step 2"); - // Warshall procedure step 2::end - - time_result = time_sequential.get_elapsed_ns(); // time sequential block::end - mark_time(timestamps, time_result, "sequential block"); - - if (print) { - print_matrix(A, n); - } - } - // run sequential::end - - // run parallel::begin - else { - spdlog::info("starting parallel execution block"); - // flag buffer for timing parallel execution - spdlog::info("initializing flag"); - plf::nanotimer time_flag_init; - time_flag_init.start(); - sycl::buffer flag_buf{sycl::range<1>{1}}; - time_result = time_flag_init.get_elapsed_ns(); - mark_time(timestamps, time_result, "init flag buf"); - - // sycl queue creation::begin - spdlog::info("setting up queue"); - plf::nanotimer time_device_init; - time_device_init.start(); - sycl::device device{run_cpuonly ? sycl::cpu_selector_v : sycl::default_selector_v}; - sycl::queue q{device, dpc_common::exception_handler, sycl::property::queue::in_order()}; - device_name = q.get_device().get_info(); - spdlog::info("device: {}", device_name); - time_result = time_device_init.get_elapsed_ns(); - mark_time(timestamps, time_result, "queue creation"); - // sycl queue creation::end - - // Warshall procedure - // step 1: copy adjacency matrix C into matrix A - // for i := 1 to n do - // for j := 1 to n do - // A[i, j] := C[i, j] - // - q.submit([&](auto &h) { - const sycl::accessor C(C_buf, h, sycl::read_only); - const sycl::accessor A(A_buf, h, sycl::write_only); - - h.parallel_for(sycl::range(n, n), [=](auto index) { - A[index] = C[index]; - - }); - }); - - // Warshall procedure - // step 2: compute the transitive closure of C as A - // - // for k := 1 to n do - // for i := 1 to n do - // for j := 1 to n do - // if A[i, j] = false then - // A[i, j] := A[i, k] and A[k, j] - // - // note: A[index] = A[i][j] - // - for (int k = 0; k < n; k++) { - - q.submit([&](auto &h) { - const sycl::accessor A(A_buf, h, sycl::write_only); - - h.parallel_for(sycl::range(n, n), [=](auto index) { - A[index] = A[index] || A[index[0]][k] * A[k][index[1]]; - - }); - }); - } - // access flag buff to initiate work on device::begin - spdlog::info("preparing flag access"); - plf::nanotimer time_parallel; - time_parallel.start(); - const sycl::host_accessor flag{flag_buf}; - time_result = time_parallel.get_elapsed_ns(); - mark_time(timestamps, time_result, "Warshall procedure step 1 & 2"); - // access flag buff to initiate work on device::end - - if (print) { - const sycl::host_accessor A{A_buf}; - print_matrix(A, n); - } - } - // run parallel::end - - time_result = time_total.get_elapsed_ns(); - mark_time(timestamps, time_result, "total time"); - print_timestamps(timestamps); - spdlog::info("all done"); - return 0; -} \ No newline at end of file