diff --git a/transitive_closure/CMakeLists.txt b/transitive_closure/CMakeLists.txt
index ebf5b0f..a410232 100644
--- a/transitive_closure/CMakeLists.txt
+++ b/transitive_closure/CMakeLists.txt
@@ -1,2 +1,2 @@
-add_executable(transitive_closure transitive_closure.cpp timestamps.cpp)
+add_executable(transitive_closure main.cpp timestamps.cpp graph.cpp)
 target_link_libraries(transitive_closure fmt::fmt spdlog::spdlog CLI11::CLI11)
\ No newline at end of file
diff --git a/transitive_closure/graph.cpp b/transitive_closure/graph.cpp
new file mode 100644
index 0000000..9ea7b48
--- /dev/null
+++ b/transitive_closure/graph.cpp
@@ -0,0 +1,192 @@
+// Inspired by: https://www.geeksforgeeks.org/how-to-create-a-random-graph-in-c/
+
+// Why to linearize: https://community.khronos.org/t/matrix-multiplication-using-2d-std-vector/106457#:~:text=On%20a%20more,be%20known%20statically.
+
+#include <stdio.h>
+#include <spdlog/spdlog.h>
+#include <spdlog/sinks/stdout_color_sinks.h>
+#include "graph.h"
+
+// Function to generate a directed graph that has no loops and no duplicate edges. The resuling graph is
+// stored in a 1-D vector.
+// Resizing graph is done in main.
+int generate_simple_directed_linear(std::vector<int> & graph, int edges, int vertices)
+{
+    if (edges > vertices * (vertices - 1)) {
+        spdlog::warn("Too many edges.");
+        return -1;
+    }
+    
+    std::vector<std::vector<int>> edge(edges, std::vector<int>(2));
+    int i = 0;
+    
+    // Generate directed graph.
+    while (i < edges) {
+        edge[i][0] = rand() % vertices;
+        edge[i][1] = rand() % vertices;
+        
+        // Discard loop.
+        if (edge[i][0] == edge[i][1]) {
+            continue;
+        }
+        // Discard duplicate edge.
+        else {
+            for (int j = 0; j < i; j++) {
+                if ((
+                    edge[i][0] == edge[j][0] &&
+                    edge[i][1] == edge[j][1]
+                    ))
+                {
+                    i--;
+                }
+            }
+        }
+        i++;
+    }
+    
+    // Initialize linear adjacency matrix.
+    int vertex_one, vertex_two;
+    for (int j = 0; j < edges; j++) {
+        vertex_one = edge[j][0];
+        vertex_two = edge[j][1];
+        graph[vertex_one * vertices + vertex_two] = 1;
+    }
+    
+    return 1;
+}
+
+// Generate directed graph provided pointer.
+int generate_simple_directed_linear(int *graph, int edges, int vertices)
+{
+    if (edges > vertices * (vertices - 1)) {
+        spdlog::warn("Too many edges.");
+        return -1;
+    }
+
+    std::vector<std::vector<int>> edge(edges, std::vector<int>(2));
+    int i = 0;
+
+    // Generate directed graph.
+    while (i < edges) {
+        edge[i][0] = rand() % vertices;
+        edge[i][1] = rand() % vertices;
+        
+        // Discard loop.
+        if (edge[i][0] == edge[i][1]) {
+            continue;
+        }
+        // Discard duplicate edge.
+        else {
+            for (int j = 0; j < i; j++) {
+                if ((
+                    edge[i][0] == edge[j][0] &&
+                    edge[i][1] == edge[j][1]
+                    ))
+                {
+                    i--;
+                }
+            }
+        }
+        i++;
+    }
+
+    // Initialize linear adjacency matrix.
+    int vertex_one, vertex_two;
+    for (int j = 0; j < edges; j++) {
+        vertex_one = edge[j][0];
+        vertex_two = edge[j][1];
+        graph[vertex_one * vertices + vertex_two] = 1;
+    }
+    
+    return 1;
+}
+
+
+
+
+
+
+// #### DEPRECATED #####
+//
+// This function generates a directed graph that has no loops and no duplicate edges. The resulting graph
+// is stored in a 2-D vector.
+int generate_simple_directed_graph(std::vector<std::vector<int>> & adj_matrix, int num_edge, int num_vertex)
+{
+    // Maximum number of edges = n x (n - 1)
+    if (num_edge > num_vertex * (num_vertex - 1)) {
+        std::cout << "Too many edges." << std::endl;
+        return -1;
+    }
+
+    // Resizing adj_matrix (num_vertex by num_vertex) is now done in main.
+    //adj_matrix.resize(num_vertex, std::vector<int>(num_vertex, 0));
+
+    int edge[num_edge][2];
+    int i = 0;
+    // Generate directed graph.
+    while (i < num_edge) {
+        // Generate the vertices connected by edge.
+        edge[i][0] = rand() % num_vertex;
+        edge[i][1] = rand() % num_vertex;
+
+        // Discard loop.
+        if (edge[i][0] == edge[i][1]) {
+            continue;
+        }
+        // Discard duplicate edge.
+        else {
+            for (int j = 0; j < i; j++) {
+                if ((
+                    edge[i][0] == edge[j][0] &&
+                    edge[i][1] == edge[j][1]
+                    ))
+                {
+                    i--;
+                }
+            }
+        }
+        i++;
+    }
+
+    // Initialize adjacency matrix.
+    int vertex_one, vertex_two;
+    for (int j = 0; j < num_edge; j++) {
+        // Set entries with connecting edges to 1.
+        vertex_one = edge[j][0];
+        vertex_two = edge[j][1];
+        adj_matrix[vertex_one][vertex_two] = 1;
+    }
+    
+    return 1;
+}
+
+// This function linearizes parameter adj_matrix and stores in linear_adj_matrix.
+void linearize(std::vector<std::vector<int>> & adj_matrix, std::vector<int> & linear_adj_matrix, int num_vertex)
+{
+    linear_adj_matrix.resize(num_vertex * num_vertex);
+    for (int i = 0; i < num_vertex; i++) {
+        for (int j = 0; j < num_vertex; j++) {
+            linear_adj_matrix[i * num_vertex + j] = adj_matrix[i][j];
+        }
+    }
+}
+
+void print_adj_matrix(std::vector<std::vector<int>> & adj_matrix, int num_vertex)
+{
+    for (int i = 0; i < num_vertex; i++) {
+        for (int j = 0; j < num_vertex; j++) {
+            std::cout << adj_matrix[i][j] << " ";
+        }
+        std::cout << std::endl;
+    }
+}
+
+void print_linear_adj_matrix(std::vector<int> & linear_adj_matrix, int num_vertex)
+{
+    for (int i = 0; i < num_vertex * num_vertex; i++) {
+        std::cout << linear_adj_matrix[i] << " ";
+        if ((i+1)%num_vertex==0) {
+            std::cout << std::endl;
+        }
+    }
+}
\ No newline at end of file
diff --git a/transitive_closure/graph.h b/transitive_closure/graph.h
new file mode 100644
index 0000000..0b3884e
--- /dev/null
+++ b/transitive_closure/graph.h
@@ -0,0 +1,41 @@
+#ifndef GRAPH_H
+#define GRAPH_H
+
+#include <vector>
+#include <iostream>
+
+int generate_simple_directed_linear(
+    std::vector<int> & graph,
+    int edges,
+    int vertices
+    );
+
+int generate_simple_directed_linear(
+    int *graph,
+    int edges,
+    int vertices
+    );
+
+int generate_simple_directed_graph(
+    std::vector<std::vector<int>> & adj_matrix,
+    int num_edge,
+    int num_vertex
+    );
+
+void linearize(
+    std::vector<std::vector<int>> & adj_matrix,
+    std::vector<int> & linear_adj_matrix,
+    int num_vertex
+    );
+
+void print_adj_matrix(
+    std::vector<std::vector<int>> & adj_matrix,
+    int num_vertex
+    );
+
+void print_linear_adj_matrix(
+    std::vector<int> & linear_adj_matrix,
+    int num_vertex
+    );
+
+#endif
\ No newline at end of file
diff --git a/transitive_closure/main.cpp b/transitive_closure/main.cpp
new file mode 100644
index 0000000..58e8769
--- /dev/null
+++ b/transitive_closure/main.cpp
@@ -0,0 +1,377 @@
+// MAIN
+// 1. Parse CLI.
+// 2. Set up device.
+// 3. Allocate USM memory.
+// 4. Allocate Buffer memory.
+// 5. Run sequential ? Use USM else use Buffer.
+// 6. Run parallel ? use USM else use Buffer.
+//
+//
+// For buffer execution:
+// 1. Allocate and initialize graph vector on host.
+// 2. Shove graph vector into sycl buffer C.
+// 3. Copy sycl buffer C to sycl buffer A (Warshall P1).
+// 4. Do computation on sycl buffer A (Warshall P1).
+// 5. Access sycl buffer A on host.
+// 6. Complete.
+//
+//
+// For USM execution:
+// 1. Allocate and initialize graph pointer on host.
+// 2. Copy contents of graph into USM C.
+// 3. Copy USM C to USM A (Warshall P1).
+// 4. Do computation on USM A (Warshall P2).
+// 5. Access USM on host.
+// 6. Complete.
+//
+// TODO: look moore into malloc_shared
+
+#include <sycl/sycl.hpp>
+#include <dpc_common.hpp>
+
+#include <CLI/CLI.hpp>
+#include <fmt/format.h>
+#include <spdlog/spdlog.h>
+#include <spdlog/sinks/stdout_color_sinks.h>
+
+#include "timestamps.h"
+#include "plf_nanotimer.h"
+#include "graph.h"
+
+int verify_results(sycl::buffer<int, 2> &a_parallel, std::vector<int> A_sequential, int vertices);
+int verify_results(int *A_parallel, int *A_sequential, int vertices);
+void warshall_parallel_p1(sycl::queue &q, sycl::buffer<int, 2> &a, std::vector<int> &c, int vertices);
+void warshall_parallel_p2(sycl::queue &q, sycl::buffer<int, 2> &a, int vertices);
+void warshall_sequential_p1(std::vector<int> &A, std::vector<int> &C, int vertices);
+void warshall_sequential_p2(std::vector<int> &A, int vertices);
+void warshall_parallel_p1(sycl::queue &q, int *A, int *C, int vertices);
+void warshall_parallel_p2(sycl::queue &q, int *A, int vertices);
+void warshall_sequential_p1(int *A, int *C, int vertices);
+void warshall_sequential_p2(int *A, int vertices);
+
+
+int main(const int argc, const char *const argv[])
+{
+    // Main declarations.
+    std::vector<std::tuple<std::string, double>> timestamps;
+    double time_result;
+    
+    // Default values.
+    int edges{380};
+    int vertices{200};
+    bool run_sequential{false};
+    bool run_parallel{false};
+    bool run_cpuonly{false};
+    bool use_usm{false};
+    bool print{false};
+    
+    // CLI setup and parse.
+    CLI::App app{"Transitive Closure"};
+    app.option_defaults()->always_capture_default(true);
+    app.add_option("-e, --edges", edges);
+    app.add_option("-v, --vertices", vertices);
+    app.add_flag("-s, --sequential", run_sequential);
+    app.add_flag("-p, --parallel", run_parallel);
+    app.add_flag("-c, --cpu-only", run_cpuonly);
+    app.add_flag("-u, --use-usm", use_usm);
+    CLI11_PARSE(app, argc, argv);
+    
+    plf::nanotimer time_total;
+    time_total.start();
+
+    // Sycl queue creation
+    spdlog::info("setting up queue");
+    plf::nanotimer time_queue_creation;
+    time_queue_creation.start();
+    sycl::device device{run_cpuonly ? sycl::cpu_selector_v : sycl::default_selector_v};
+    sycl::queue q{device, dpc_common::exception_handler};
+    std::string device_name = q.get_device().get_info<sycl::info::device::name>();
+    uint device_max_compute_units = q.get_device().get_info<sycl::info::device::max_compute_units>();
+    spdlog::info("device name: {}, device max compute units: {}", device_name, device_max_compute_units);
+    time_result = time_queue_creation.get_elapsed_ns();
+    mark_time(timestamps, time_result, "Sycl queue creation");
+
+
+
+    // Allocate memory using USM and pointers
+    plf::nanotimer time_memory_alloc_usm;
+    time_memory_alloc_usm.start();
+    int *graph_usm = sycl::malloc_shared<int>(vertices * vertices, q);
+    int *sequential_usm = sycl::malloc_shared<int>(vertices * vertices, q);
+    int *parallel_usm = sycl::malloc_shared<int>(vertices * vertices, q);
+
+    if ((graph_usm == nullptr) || (sequential_usm == nullptr) ||
+        (parallel_usm == nullptr)) {
+      if (graph_usm != nullptr) free(graph_usm, q);
+      if (sequential_usm != nullptr) free(sequential_usm, q);
+      if (parallel_usm != nullptr) free(parallel_usm, q);
+
+      spdlog::error("USM memory allocation failure");
+      return -1;
+    }
+    time_result = time_memory_alloc_usm.get_elapsed_ns();
+    mark_time(timestamps, time_result, "USM memory allocation");
+
+    plf::nanotimer time_init_buffer;
+    time_init_buffer.start();
+
+    if (!generate_simple_directed_linear(graph_usm, edges, vertices)) {
+        spdlog::info("(USM) Failed to generate simple directed graph with edges: {} and vertices: {}", edges, vertices);
+        if (graph_usm != nullptr) free(graph_usm, q);
+        if (sequential_usm != nullptr) free(sequential_usm, q);
+        if (parallel_usm != nullptr) free(parallel_usm, q);
+        return -1;
+    }
+    time_result = time_init_buffer.get_elapsed_ns();
+    mark_time(timestamps, time_result, "USM graph initialization");
+
+
+
+    // Allocate memory for parallel and sequential execution.
+    plf::nanotimer time_memory_alloc;
+    time_memory_alloc.start();
+    spdlog::info("Allocating memory for graph data.");
+    std::vector<int> graph_vec(vertices * vertices);
+    spdlog::info("Allocating memory for sequential result.");
+    std::vector<int> sequential_vec(vertices * vertices);
+    spdlog::info("Allocating memory for parallel result.");
+    sycl::buffer<int, 2> parallel_buf(sycl::range(vertices, vertices));
+    time_result = time_memory_alloc.get_elapsed_ns();
+    mark_time(timestamps, time_result, "Buffer memory allocation");
+
+    // Initialize adjacency matrix.
+    spdlog::info("Generating simple directed graph with edges: {} and vertices: {}", edges, vertices);
+    if (!generate_simple_directed_linear(graph_vec, edges, vertices)) {
+        spdlog::info("(Buffer) Failed to generate simple directed graph with edges: {} and vertices: {}", edges, vertices);
+        return 1;
+    }
+
+    if (run_sequential) {
+        plf::nanotimer time_sequential;
+        if (use_usm) {
+            // TODO implement overload function for 
+            spdlog::info("Warshall sequential with USM begin");
+            time_sequential.start();
+            warshall_sequential_p1(sequential_usm, graph_usm, vertices);
+            warshall_sequential_p2(sequential_usm, vertices);
+            time_result = time_sequential.get_elapsed_ns();
+            mark_time(timestamps, time_result, "Warshall USM sequential");
+            spdlog::info("Warshall USM sequential end");
+        }
+        else { // Use buffers.
+            spdlog::info("Warshall sequential with buffers begin");
+            time_sequential.start();
+            warshall_sequential_p1(sequential_vec, graph_vec, vertices);
+            warshall_sequential_p2(sequential_vec, vertices);
+            time_result = time_sequential.get_elapsed_ns();
+            mark_time(timestamps, time_result, "Warshall sequential");
+            spdlog::info("Warshall sequential end");
+        }
+    }
+
+    if (run_parallel) {
+        plf::nanotimer time_parallel;
+        if (use_usm) {
+            //TODO - implement overloaded function for warshall parallel w/ pointers.
+            spdlog::info("Warshall parallel with USM begin");
+            time_parallel.start();
+            warshall_parallel_p1(q, parallel_usm, graph_usm, vertices);
+            warshall_parallel_p2(q, parallel_usm, vertices);
+            time_result = time_parallel.get_elapsed_ns();
+            mark_time(timestamps, time_result, "Warshall USM parallel");
+            spdlog::info("Warshall USM parallel end");
+        }
+        else { // Use buffers.
+            spdlog::info("Warshall parallel with Buffers begin");
+            time_parallel.start();
+            warshall_parallel_p1(q, parallel_buf, graph_vec, vertices);
+            warshall_parallel_p2(q, parallel_buf, vertices);
+            const sycl::host_accessor trigger_work{parallel_buf};
+            time_result = time_parallel.get_elapsed_ns();
+            mark_time(timestamps, time_result, "Warshall Buffers parallel");
+            spdlog::info("Warshall Buffers parallel end");
+        }
+    }
+
+    if (run_sequential && run_parallel) {
+        spdlog::info("Verify results begin");
+        plf::nanotimer time_verify;
+        time_verify.start();
+        if (use_usm) {
+            if (!verify_results(parallel_usm, sequential_usm, vertices)) {
+                spdlog::warn("Results do not match!");
+            }
+            else {
+                spdlog::info("Results match!");
+            }
+        }
+        else {
+            if (!verify_results(parallel_buf, sequential_vec, vertices)) {
+                spdlog::warn("Results do not match!");
+            }
+            else {
+                spdlog::info("Results match!");
+            }
+        }
+        time_result = time_verify.get_elapsed_ns();
+        mark_time(timestamps, time_result, "Verify results");
+        spdlog::info("Verify results end");
+    }
+
+    if (graph_usm != nullptr) { free(graph_usm, q); }
+    if (sequential_usm != nullptr) { free(sequential_usm, q); }
+    if (parallel_usm != nullptr) { free(parallel_usm, q); }
+
+    time_result = time_total.get_elapsed_ns();
+    mark_time(timestamps, time_result, "Total time");
+    print_timestamps(timestamps);
+    spdlog::info("All done");
+    return 0;
+}
+
+// Function compares sequential and parallel result.
+int verify_results(sycl::buffer<int, 2> &a_parallel, std::vector<int> A_sequential, int vertices)
+{
+    const sycl::host_accessor A_parallel{a_parallel};
+    for (int i = 0; i < vertices; i++) {
+        for (int j = 0; j < vertices; j++) {
+            if (A_parallel[i][j] != A_sequential[i * vertices + j]) {
+                return -1;
+            }
+        }
+    }
+    return 1;
+}
+
+int verify_results(int *A_parallel, int *A_sequential, int vertices)
+{
+    for (int i = 0; i < vertices; i++) {
+        for (int j = 0; j < vertices; j++) {
+            if (A_parallel[i * vertices + j] != A_sequential[i * vertices + j]) {
+                return -1;
+            }
+        }
+    }
+    return 1;
+}
+
+// Warshall procedure
+// Step 1: copy adjacency matrix C into matrix A
+// for i := 1 to n do 
+//   for j := 1 to n do
+//     A[i, j] := C[i, j]
+//
+//
+// Parallel part 1 w/ Buffers.
+void warshall_parallel_p1(sycl::queue &q, sycl::buffer<int, 2> &a, std::vector<int> &c, int vertices)
+{
+    sycl::buffer<int> c_buf(c.data(), sycl::range<1>{c.size()});
+    q.submit([&](auto &h) {
+        const sycl::accessor C(c_buf, h, sycl::read_only);
+        const sycl::accessor A(a, h, sycl::write_only);
+
+        h.parallel_for(sycl::range(vertices, vertices), [=](auto index) {
+            int i = index[0];
+            int j = index[1];
+            A[i][j] = C[i * vertices + j];
+        });
+    });
+}
+
+// Parallel part 1 w/ USM.
+//TODO
+void warshall_parallel_p1(sycl::queue &q, int *A, int *C, int vertices)
+{
+    q.submit([&](auto &h) {
+        h.parallel_for(sycl::range(vertices * vertices), [=](auto index) {
+            A[index] = C[index];
+        });
+    });
+}
+
+// Parallel part 2 w/ USM.
+//TODO
+void warshall_parallel_p2(sycl::queue &q, int *A, int vertices)
+{
+    for (int k = 0; k < vertices; k++) {
+        q.submit([&](auto &h) {
+            h.parallel_for(sycl::range(vertices * vertices), [=](auto index) {
+                int i = index / vertices;
+                int j = index % vertices;
+                A[i * vertices + j] = A[i * vertices + j] || A[i * vertices + k] && A[k * vertices + j];
+            });
+        });
+    }
+}
+
+// Warshall procedure
+// Step 2: compute the transitive closure of C as A
+//
+// for k := 1 to n do
+//   for i := 1 to n do
+//     for j := 1 to n do
+//       if A[i, j] = false then
+//         A[i, j] := A[i, k] and A[k, j]
+//
+// note: A[index] = A[i][j]
+//
+//
+// Parallel part 2 w/ Buffers.
+void warshall_parallel_p2(sycl::queue &q, sycl::buffer<int, 2> &a, int vertices)
+{
+    for (int k = 0; k < vertices; k++) {
+        q.submit([&](auto &h) {
+            const sycl::accessor A(a, h, sycl::read_write);
+
+            h.parallel_for(sycl::range(vertices, vertices), [=](auto index) {
+                int i = index[0];
+                int j = index[1];
+                A[i][j] = A[i][j] || A[i][k] && A[k][j];
+            });
+        });
+    }
+}
+
+// Sequential part 1 w/ Buffers.
+void warshall_sequential_p1(std::vector<int> &A, std::vector<int> &C, int vertices)
+{
+    for (int i = 0; i < vertices; i++) {
+        for (int j = 0; j < vertices; j++) {
+            A[i * vertices + j] = C[i * vertices + j];
+        }
+    }
+}
+
+// Sequential part 1 w/ USM.
+void warshall_sequential_p1(int *A, int *C, int vertices)
+{
+    for (int i = 0; i < vertices; i++) {
+        for (int j = 0; j < vertices; j++) {
+            A[i * vertices + j] = C[i * vertices + j];
+        }
+    }
+}
+
+// Sequential part 2 w/ Buffers.
+void warshall_sequential_p2(std::vector<int> &A, int vertices)
+{
+    for (int k = 0; k < vertices; k++) {
+        for (int i = 0; i < vertices; i++) {
+            for (int j = 0; j < vertices; j++) {
+                A[i * vertices + j] = A[i * vertices + j] || A[i * vertices + k] && A[k * vertices + j];
+            }
+        }
+    }
+}
+
+// Sequential part 2 w/ USM.
+void warshall_sequential_p2(int *A, int vertices)
+{
+    for (int k = 0; k < vertices; k++) {
+        for (int i = 0; i < vertices; i++) {
+            for (int j = 0; j < vertices; j++) {
+                A[i * vertices + j] = A[i * vertices + j] || A[i * vertices + k] && A[k * vertices + j];
+            }
+        }
+    }  
+}
\ No newline at end of file
diff --git a/transitive_closure/test.txt b/transitive_closure/test.txt
deleted file mode 100644
index b1cfde7..0000000
--- a/transitive_closure/test.txt
+++ /dev/null
@@ -1 +0,0 @@
-4 4 1 1 0 1 0 1 1 0 0 0 1 1 0 0 0 1
\ No newline at end of file
diff --git a/transitive_closure/transitive_closure.cpp b/transitive_closure/transitive_closure.cpp
deleted file mode 100644
index dc01b0a..0000000
--- a/transitive_closure/transitive_closure.cpp
+++ /dev/null
@@ -1,261 +0,0 @@
-#include <sycl/sycl.hpp>
-#include <dpc_common.hpp>
-
-#include <CLI/CLI.hpp>
-#include <fmt/format.h>
-#include <spdlog/spdlog.h>
-#include <spdlog/sinks/stdout_color_sinks.h>
-
-#include "timestamps.h"
-#include "plf_nanotimer.h"
-
-int verify_matrix_squared(std::string & filename, int & n)
-{
-    int rows, columns;
-    std::ifstream input(filename);
-    
-    if (!input.is_open()) {
-        std::cout << "The file could not be opened.";
-        return 1;
-    }
-    
-    input >> rows >> columns;
-    n = rows;
-    
-    if (rows != columns) { return 1; }
-    
-    input.close();
-    return 0;
-}
-
-int populate_matrix(std::string & filename, sycl::buffer<int, 2> & C_buf)
-{
-    const sycl::host_accessor C(C_buf);
-    std::ifstream input(filename);
-    
-    if (!input.is_open()) {
-        std::cout << "The file could not be opened.";
-        return 1;
-    }
-    
-    int rows, columns;
-    input >> rows >> columns;
-    
-    if (rows != columns) { return 1; }
-    
-    for (int i = 0; i < rows; i++) {
-        for (int j = 0; j < columns; j++) {
-            input >> C[i][j];
-        }
-    }
-    
-    input.close();
-    return 0;
-    
-}
-
-void print_matrix(const sycl::host_accessor<int, 2> & A, int n)
-{
-    std::cout << "Transitive Closure A of C: " << std::endl;
-    for (int i = 0; i < n; i++) {
-        for (int j = 0; j < n; j++) {
-            std::cout << A[i][j] << " ";
-        }
-        std::cout << std::endl;
-    }
-}
-
-int main(const int argc, const char *const argv[])
-{
-    // main declarations::begin
-    std::string input; // file to adjacency matrix
-    int n; // squared matrix dimension
-    std::string device_name;
-    std::vector<std::tuple<std::string, double>> timestamps;
-    double time_result;
-    // main declarations::end
-    
-    // main inits::begin
-    bool run_sequentially{false};
-    bool run_cpuonly{false};
-    bool print{false};
-    // main inits::end
-    
-    // cli setup and parse::begin
-    CLI::App app{"Transitive Closure"};
-    app.option_defaults()->always_capture_default(true);
-    app.add_option("-i, --input", input);
-    app.add_flag("-s, --sequential", run_sequentially);
-    app.add_flag("-c, --cpu-only", run_cpuonly);
-    app.add_flag("-p, --print", print);
-    CLI11_PARSE(app, argc, argv);
-    // cli setup and parse::end
-    
-    plf::nanotimer time_total;
-    time_total.start();
-    
-    // get dimensions of adjacency matrix::begin
-    spdlog::info("attempting to verify square-ness of adjacency matrix using input: {}", input);
-    if (verify_matrix_squared(input, n) == 1) {
-        spdlog::error("error when verifying square-ness of adjacency matrix using input: {}", input);
-        return 1;
-    }
-    // get dimensions of adjacency matrix::end
-    
-    // declare sycl buffers::begin
-    plf::nanotimer time_buffer_dec;
-    time_buffer_dec.start();
-    sycl::buffer<int, 2> C_buf(sycl::range(n, n));
-    sycl::buffer<int, 2> A_buf(sycl::range(n, n));
-    time_result = time_buffer_dec.get_elapsed_ns();
-    mark_time(timestamps, time_result, "declare sycl buffers");
-    // declare sycl buffers::end
-    
-    // init sycl buffers::begin
-    plf::nanotimer time_buffer_init;
-    time_buffer_init.start();
-    spdlog::info("attempting to populate adjacency matrix using input: {}", input);
-    if (populate_matrix(input, C_buf) == 1) {
-        spdlog::error("error when populating adjacency matrix using input: {}", input);
-        return 1;
-    }
-    time_result = time_buffer_init.get_elapsed_ns();
-    mark_time(timestamps, time_result, "populate sycl buffer with adj matrix");
-    // init sycl buffers::end
-    
-    // run sequential::begin
-    if (run_sequentially) {
-        spdlog::info("starting sequential execution block");
-        plf::nanotimer time_sequential; // time sequential block::begin
-        time_sequential.start();
-        
-        // init host accessors::begin
-        spdlog::info("initializing host accessors");
-        plf::nanotimer time_init_host;
-        time_init_host.start();
-        const sycl::host_accessor C(C_buf);
-        const sycl::host_accessor A(A_buf);
-        time_result = time_init_host.get_elapsed_ns();
-        mark_time(timestamps, time_result, "host accessor creation");
-        // init host accessors::end
-        
-        // Warshall procedure step 1::begin
-        spdlog::info("starting Warshall procedure step 1");
-        plf::nanotimer time_warshall_p1;
-        time_warshall_p1.start();
-        for (int i = 0; i < n; i++) {
-            for (int j = 0; j < n; j++) {
-                A[i][j] = C[i][j];
-            }
-        }
-        time_result = time_warshall_p1.get_elapsed_ns();
-        mark_time(timestamps, time_result, "Warshall procedure step 1");
-        // Warshall procedure step 1::end
-        
-        // Warshall procedure step 2::begin
-        spdlog::info("starting Warshall procedure step 2");
-        plf::nanotimer time_warshall_p2;
-        time_warshall_p2.start();
-        for (int k = 0; k < n; k++) {
-            for (int i = 0; i < n; i++) {
-                for (int j = 0; j < n; j++) {
-                    A[i][j] = A[i][j] || A[i][k] * A[k][j];
-                }
-            }
-        }
-        time_result = time_warshall_p2.get_elapsed_ns();
-        mark_time(timestamps, time_result, "Warshall procedure step 2");
-        // Warshall procedure step 2::end
-        
-        time_result = time_sequential.get_elapsed_ns(); // time sequential block::end
-        mark_time(timestamps, time_result, "sequential block");
-        
-        if (print) {
-            print_matrix(A, n);
-        }
-    }
-    // run sequential::end
-    
-    // run parallel::begin
-    else {
-        spdlog::info("starting parallel execution block");
-        // flag buffer for timing parallel execution
-        spdlog::info("initializing flag");
-        plf::nanotimer time_flag_init;
-        time_flag_init.start();
-        sycl::buffer<int> flag_buf{sycl::range<1>{1}};
-        time_result = time_flag_init.get_elapsed_ns();
-        mark_time(timestamps, time_result, "init flag buf");
-        
-        // sycl queue creation::begin
-        spdlog::info("setting up queue");
-        plf::nanotimer time_device_init;
-        time_device_init.start();
-        sycl::device device{run_cpuonly ? sycl::cpu_selector_v : sycl::default_selector_v};
-        sycl::queue q{device, dpc_common::exception_handler, sycl::property::queue::in_order()};
-        device_name = q.get_device().get_info<sycl::info::device::name>();
-        spdlog::info("device: {}", device_name);
-        time_result = time_device_init.get_elapsed_ns();
-        mark_time(timestamps, time_result, "queue creation");
-        // sycl queue creation::end
-
-        // Warshall procedure
-        // step 1: copy adjacency matrix C into matrix A
-        // for i := 1 to n do 
-        //   for j := 1 to n do
-        //     A[i, j] := C[i, j]
-        //
-        q.submit([&](auto &h) {
-            const sycl::accessor C(C_buf, h, sycl::read_only);
-            const sycl::accessor A(A_buf, h, sycl::write_only);
-
-            h.parallel_for(sycl::range(n, n), [=](auto index) {
-                A[index] = C[index];
-
-            });
-        });
-
-        // Warshall procedure
-        // step 2: compute the transitive closure of C as A
-        //
-        // for k := 1 to n do
-        //   for i := 1 to n do
-        //     for j := 1 to n do
-        //       if A[i, j] = false then
-        //         A[i, j] := A[i, k] and A[k, j]
-        //
-        // note: A[index] = A[i][j]
-        //
-        for (int k = 0; k < n; k++) {
-
-            q.submit([&](auto &h) {
-                const sycl::accessor A(A_buf, h, sycl::write_only);
-                
-                h.parallel_for(sycl::range(n, n), [=](auto index) {
-                    A[index] = A[index] || A[index[0]][k] * A[k][index[1]];
-
-                });
-            });
-        }
-        // access flag buff to initiate work on device::begin
-        spdlog::info("preparing flag access");
-        plf::nanotimer time_parallel;
-        time_parallel.start();
-        const sycl::host_accessor flag{flag_buf};
-        time_result = time_parallel.get_elapsed_ns();
-        mark_time(timestamps, time_result, "Warshall procedure step 1 & 2");
-        // access flag buff to initiate work on device::end
-        
-        if (print) {
-            const sycl::host_accessor A{A_buf};
-            print_matrix(A, n);
-        }
-    }
-    // run parallel::end
-    
-    time_result = time_total.get_elapsed_ns();
-    mark_time(timestamps, time_result, "total time");
-    print_timestamps(timestamps);
-    spdlog::info("all done");
-    return 0;
-}
\ No newline at end of file