From 16aad4b87ad6337cd77d8dfafa9c2742fd3383e1 Mon Sep 17 00:00:00 2001
From: yinying-lisa-li <yinyingli@google.com>
Date: Thu, 15 Aug 2024 18:22:29 +0000
Subject: [PATCH 1/3] add parallelization to mpact

---
 python/mpact/mpactbackend.py | 30 +++++++++++++++++++++------
 test/CMakeLists.txt          |  5 +++++
 test/python/parallel.py      | 40 ++++++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+), 6 deletions(-)
 create mode 100644 test/python/parallel.py

diff --git a/python/mpact/mpactbackend.py b/python/mpact/mpactbackend.py
index 425413a..ec944fa 100644
--- a/python/mpact/mpactbackend.py
+++ b/python/mpact/mpactbackend.py
@@ -247,10 +247,12 @@ def invoke(*args):
             "func.func(refback-munge-memref-copy)",
             "func.func(convert-linalg-to-loops)",
             "func.func(lower-affine)",
+            "convert-scf-to-openmp{{{omp_options}}}",
             "convert-scf-to-cf",
             "func.func(refback-expand-ops-for-llvm)",
             "func.func(arith-expand)",
             "func.func(convert-math-to-llvm)",
+            "convert-openmp-to-llvm",
             "convert-math-to-libm",
             "expand-strided-metadata",
             "finalize-memref-to-llvm",
@@ -276,9 +278,13 @@ def invoke(*args):
 class MpactBackendCompiler:
     """Main entry-point for the MPACT backend compiler."""
 
-    def __init__(self, opt_level, use_sp_it):
+    def __init__(self, opt_level, use_sp_it, parallel,
+                 enable_ir_printing, num_threads):
         self.opt_level = opt_level
         self.use_sp_it = use_sp_it
+        self.parallel = parallel
+        self.enable_ir_printing = enable_ir_printing
+        self.num_threads = num_threads
 
     def compile(self, imported_module: Module) -> MpactCompiledArtifact:
         sp_options = (
@@ -286,7 +292,13 @@ def compile(self, imported_module: Module) -> MpactCompiledArtifact:
             if self.use_sp_it
             else "vl=16 enable-simd-index32"
         )
-        LOWERING_PIPELINE = LOWERING_PIPELINE_TEMPLATE.format(sp_options=sp_options)
+        omp_options = (f"num-threads={self.num_threads}")
+        # TODO: enable the parallelization strategy
+        # once MLIR bump is completed.
+        # if self.parallel:
+        #     sp_options += f" parallelization-strategy={self.parallel}"
+        LOWERING_PIPELINE = LOWERING_PIPELINE_TEMPLATE.format(
+            sp_options=sp_options, omp_options=omp_options)
         """Compiles an imported module, with a flat list of functions.
         The module is expected to be in linalg-on-tensors + scalar code form.
 
@@ -299,7 +311,7 @@ def compile(self, imported_module: Module) -> MpactCompiledArtifact:
             imported_module,
             LOWERING_PIPELINE,
             "Lowering Linalg-on-Tensors IR to LLVM with MpactBackendCompiler",
-            enable_ir_printing=False,
+            enable_ir_printing=self.enable_ir_printing,
         )
         return imported_module
 
@@ -461,7 +473,9 @@ def export_and_import(f, *args, **kwargs):
     return fx_importer.module
 
 
-def mpact_jit_compile(f, *args, opt_level=2, use_sp_it=False, **kwargs):
+def mpact_jit_compile(f, *args, opt_level=2, use_sp_it=False, 
+                      parallel="none", enable_ir_printing=False,
+                      num_threads = 1, **kwargs):
     """This method compiles the given callable using the MPACT backend."""
     # Import module and lower into Linalg IR.
     module = export_and_import(f, *args, **kwargs)
@@ -473,10 +487,14 @@ def mpact_jit_compile(f, *args, opt_level=2, use_sp_it=False, **kwargs):
             "torch-backend-to-linalg-on-tensors-backend-pipeline)"
         ),
         "Lowering TorchFX IR -> Linalg IR",
-        enable_ir_printing=False,
+        enable_ir_printing=enable_ir_printing,
     )
     # Compile with MPACT backend compiler.
-    backend = MpactBackendCompiler(opt_level=opt_level, use_sp_it=use_sp_it)
+    backend = MpactBackendCompiler(opt_level=opt_level,
+                                   use_sp_it=use_sp_it,
+                                   parallel=parallel,
+                                   enable_ir_printing=enable_ir_printing,
+                                   num_threads=num_threads)
     compiled = backend.compile(module)
     invoker = backend.load(compiled)
     return invoker, f
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 43c3ab9..30b9164 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -23,4 +23,9 @@ add_lit_testsuite(check-mpact "Running the MPACT regression tests"
         )
 set_target_properties(check-mpact PROPERTIES FOLDER "Tests")
 
+# TODO: find omp library.
+find_package(OpenMP REQUIRED)
+add_compile_options(${OpenMP_CXX_FLAGS})
+# target_link_libraries(check-mpact OpenMP::OpenMP_CXX)
+
 add_lit_testsuites(MPACT ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${TORCH_MLIR_TEST_DEPENDS})
diff --git a/test/python/parallel.py b/test/python/parallel.py
new file mode 100644
index 0000000..edcf859
--- /dev/null
+++ b/test/python/parallel.py
@@ -0,0 +1,40 @@
+# RUN: %PYTHON -s %s 2>&1 | FileCheck %s
+
+import gc
+import sys
+import torch
+import numpy as np
+
+from mpact.mpactbackend import mpact_jit
+
+from mpact.models.kernels import MMNet
+
+
+def run_test(f, *args, **kwargs):
+    print("TEST:", f.__name__, file=sys.stderr)
+    f(*args, **kwargs)
+    gc.collect()
+
+net = MMNet()
+
+# Construct dense and sparse matrices.
+X = torch.arange(0, 16, dtype=torch.float32).view(4, 4)
+Y = torch.arange(16, 32, dtype=torch.float32).view(4, 4)
+A = torch.tensor(
+    [
+        [0.0, 1.0, 0.0, 0.0],
+        [0.0, 0.0, 0.0, 2.0],
+        [0.0, 0.0, 0.0, 0.0],
+        [3.0, 0.0, 0.0, 0.0],
+    ],
+    dtype=torch.float32,
+)
+S = A.to_sparse_csr()
+
+# Run it with MPACT.
+# TODO: enable the check test.
+# C-HECK: omp.parallel
+# CHECK: openmp
+run_test(mpact_jit, net, X, Y,
+         parallel="any-storage-any-loop", enable_ir_printing=True,
+         num_threads=10)

From 546c875cc6c65ba421b6c32b80de5f16e75605ca Mon Sep 17 00:00:00 2001
From: yinying-lisa-li <yinyingli@google.com>
Date: Thu, 15 Aug 2024 18:33:14 +0000
Subject: [PATCH 2/3] modify README

---
 README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/README.md b/README.md
index 1488ef8..f1f63db 100644
--- a/README.md
+++ b/README.md
@@ -91,6 +91,12 @@ To speed up the build process, you can set up [ccache](https://ccache.dev/downlo
 -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache
 ```
 
+To enable parallelization with OpenMP runtime, add the following flag to the command above:
+
+```shell
+-DLLVM_ENABLE_RUNTIMES=openmp
+```
+
 Run the following to ensure the MPACT compiler builds and runs correctly.
 
 ```shell

From 720d63bca49ff6a5a1a01bb4b18b0d48c98a1d81 Mon Sep 17 00:00:00 2001
From: yinying-lisa-li <yinyingli@google.com>
Date: Thu, 15 Aug 2024 19:57:30 +0000
Subject: [PATCH 3/3] format

---
 python/mpact/mpactbackend.py | 33 +++++++++++++++++++++------------
 test/CMakeLists.txt          |  4 ++--
 test/python/parallel.py      | 13 ++++++++++---
 3 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/python/mpact/mpactbackend.py b/python/mpact/mpactbackend.py
index ec944fa..0d358d0 100644
--- a/python/mpact/mpactbackend.py
+++ b/python/mpact/mpactbackend.py
@@ -278,8 +278,7 @@ def invoke(*args):
 class MpactBackendCompiler:
     """Main entry-point for the MPACT backend compiler."""
 
-    def __init__(self, opt_level, use_sp_it, parallel,
-                 enable_ir_printing, num_threads):
+    def __init__(self, opt_level, use_sp_it, parallel, enable_ir_printing, num_threads):
         self.opt_level = opt_level
         self.use_sp_it = use_sp_it
         self.parallel = parallel
@@ -292,13 +291,14 @@ def compile(self, imported_module: Module) -> MpactCompiledArtifact:
             if self.use_sp_it
             else "vl=16 enable-simd-index32"
         )
-        omp_options = (f"num-threads={self.num_threads}")
+        omp_options = f"num-threads={self.num_threads}"
         # TODO: enable the parallelization strategy
         # once MLIR bump is completed.
         # if self.parallel:
         #     sp_options += f" parallelization-strategy={self.parallel}"
         LOWERING_PIPELINE = LOWERING_PIPELINE_TEMPLATE.format(
-            sp_options=sp_options, omp_options=omp_options)
+            sp_options=sp_options, omp_options=omp_options
+        )
         """Compiles an imported module, with a flat list of functions.
         The module is expected to be in linalg-on-tensors + scalar code form.
 
@@ -473,9 +473,16 @@ def export_and_import(f, *args, **kwargs):
     return fx_importer.module
 
 
-def mpact_jit_compile(f, *args, opt_level=2, use_sp_it=False, 
-                      parallel="none", enable_ir_printing=False,
-                      num_threads = 1, **kwargs):
+def mpact_jit_compile(
+    f,
+    *args,
+    opt_level=2,
+    use_sp_it=False,
+    parallel="none",
+    enable_ir_printing=False,
+    num_threads=1,
+    **kwargs,
+):
     """This method compiles the given callable using the MPACT backend."""
     # Import module and lower into Linalg IR.
     module = export_and_import(f, *args, **kwargs)
@@ -490,11 +497,13 @@ def mpact_jit_compile(f, *args, opt_level=2, use_sp_it=False,
         enable_ir_printing=enable_ir_printing,
     )
     # Compile with MPACT backend compiler.
-    backend = MpactBackendCompiler(opt_level=opt_level,
-                                   use_sp_it=use_sp_it,
-                                   parallel=parallel,
-                                   enable_ir_printing=enable_ir_printing,
-                                   num_threads=num_threads)
+    backend = MpactBackendCompiler(
+        opt_level=opt_level,
+        use_sp_it=use_sp_it,
+        parallel=parallel,
+        enable_ir_printing=enable_ir_printing,
+        num_threads=num_threads,
+    )
     compiled = backend.compile(module)
     invoker = backend.load(compiled)
     return invoker, f
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 30b9164..b93e6e2 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -24,8 +24,8 @@ add_lit_testsuite(check-mpact "Running the MPACT regression tests"
 set_target_properties(check-mpact PROPERTIES FOLDER "Tests")
 
 # TODO: find omp library.
-find_package(OpenMP REQUIRED)
-add_compile_options(${OpenMP_CXX_FLAGS})
+# find_package(OpenMP REQUIRED)
+# add_compile_options(${OpenMP_CXX_FLAGS})
 # target_link_libraries(check-mpact OpenMP::OpenMP_CXX)
 
 add_lit_testsuites(MPACT ${CMAKE_CURRENT_SOURCE_DIR} DEPENDS ${TORCH_MLIR_TEST_DEPENDS})
diff --git a/test/python/parallel.py b/test/python/parallel.py
index edcf859..427155a 100644
--- a/test/python/parallel.py
+++ b/test/python/parallel.py
@@ -15,6 +15,7 @@ def run_test(f, *args, **kwargs):
     f(*args, **kwargs)
     gc.collect()
 
+
 net = MMNet()
 
 # Construct dense and sparse matrices.
@@ -35,6 +36,12 @@ def run_test(f, *args, **kwargs):
 # TODO: enable the check test.
 # C-HECK: omp.parallel
 # CHECK: openmp
-run_test(mpact_jit, net, X, Y,
-         parallel="any-storage-any-loop", enable_ir_printing=True,
-         num_threads=10)
+run_test(
+    mpact_jit,
+    net,
+    X,
+    Y,
+    parallel="any-storage-any-loop",
+    enable_ir_printing=True,
+    num_threads=10,
+)