ORNL · thomasRoglin · Apr 24, 2025 · Jul 2, 2025 · Jul 2, 2025
diff --git a/src/scripts/autohecbench.py b/src/scripts/autohecbench.py
@@ -8,6 +8,12 @@
 import logging
 import traceback
 
+class Status():
+    FAILED = "failed"
+    SUCCESS = "success"
+    SKIPPED = "skipped"
+    NOT_EVALUATED = "not_evaluated"
+
 def await_input(prompt: str, is_valid_input) -> str:
     """ Wait the user for input until it is valid. """
     r = input(prompt)
@@ -16,7 +22,7 @@ def await_input(prompt: str, is_valid_input) -> str:
     return r
 
 class Benchmark:
-    def __init__(self, args, name, res_regex, run_args = [], binary = "main", invert = False):
+    def __init__(self, args, name, res_regex, verif_info, run_args = [], binary = "main", invert = False):
         if name.endswith('sycl'):
             logging.info(f"Type of SYCL device to use: {args.sycl_type}")
             self.MAKE_ARGS = ['GCC_TOOLCHAIN="{}"'.format(args.gcc_toolchain)]
@@ -49,6 +55,9 @@ def __init__(self, args, name, res_regex, run_args = [], binary = "main", invert
         else:
             self.MAKE_ARGS = []
 
+        if(args.verify ):
+            self.MAKE_ARGS.append('VERIFY=yes')
+
         if args.compiler_name:
             self.MAKE_ARGS.append('CC={}'.format(args.compiler_name))
 
@@ -64,11 +73,16 @@ def __init__(self, args, name, res_regex, run_args = [], binary = "main", invert
         self.name = name
         self.binary = binary
         self.res_regex = res_regex
+        self.verif_info = verif_info
         self.args = run_args
         self.invert = invert
         self.clean = args.clean
         self.verbose = args.verbose
 
+        self.compilation_status = Status.NOT_EVALUATED
+        self.run_status = Status.NOT_EVALUATED
+        self.verification_status = Status.NOT_EVALUATED
+
     def compile(self, shared_data):
         if self.clean:
             subprocess.run(["make", "clean"], cwd=self.path).check_returncode()
@@ -83,7 +97,7 @@ def compile(self, shared_data):
 
         try:
             proc.check_returncode()
-            shared_data[self.name] = "success"
+            shared_data[self.name] = Status.SUCCESS
         except subprocess.CalledProcessError as e:
             print(f'Failed compilation in {self.path}.\n{e}')
             if e.stderr:
@@ -96,13 +110,13 @@ def compile(self, shared_data):
             print(cause.stdout)
             print(cause.stderr)
             print("*****************************************************************************************")
-            shared_data[self.name] = "failed"
+            shared_data[self.name] = Status.FAILED
             #raise(e)
 
         if self.verbose:
             print(proc.stdout)
 
-    def run(self):
+    def run(self, verify = False):
         cmd = ["./" + self.binary] + self.args
         proc = subprocess.run(cmd, cwd=self.path, timeout=600,
                               stdout=subprocess.PIPE, encoding="utf-8")
@@ -118,10 +132,34 @@ def run(self):
              print("Position:", e.pos)
         logging.debug(f'Results of re.findall:\n {res}')
         if not res:
+            self.run_status = Status.FAILED
             raise Exception(self.path + ":\nno regex match for " + self.res_regex + " in\n" + out)
+        self.run_status = Status.SUCCESS
         res = sum([float(i) for i in res]) #in case of multiple outputs sum them (e.g. total time)
         if self.invert:
             res = 1/res
+
+        if(verify != True):
+            return res
+
+        verif_type = self.verif_info[0]
+        verif_args = self.verif_info[1]
+
+        if (verif_type == "no_verification"):
+            self.verification_status = Status.SKIPPED
+
+        elif (verif_type == "verification_token"):
+            reg_success = verif_args[0]
+            reg_fail = verif_args[1]
+
+            match_success = re.findall(reg_success, out)
+            match_fail = re.findall(reg_fail, out)
+
+            if( match_fail == [] and match_success != [] ):
+                self.verification_status = Status.SUCCESS
+            else:
+                self.verification_status = Status.FAILED
+
         return res
 
 
@@ -144,6 +182,8 @@ def main():
                         help='Repeat benchmark run')
     parser.add_argument('--warmup', '-w', type=bool, default=True,
                         help='Run a warmup iteration')
+    parser.add_argument('--verify', type=bool, default=True,
+                        help='verify benchmark results')
     parser.add_argument('--sycl-type', '-t', choices=['cuda', 'hip', 'opencl', 'cpu'], default='cuda',
                         help='Type of SYCL device to use (default is cuda)')
     parser.add_argument('--nvidia-sm', type=int, default=60,
@@ -279,7 +319,8 @@ def main():
                     # record the status only when it is in the input benchmark list
                     ch_index = bench.find('-')
                     if bench[:ch_index] in benchmarks.keys():
-                        summary[bench]["run"] = "skipped"
+                        summary[bench]["run"] = Status.SKIPPED
+                        summary[bench]["verification"] = Status.SKIPPED
                 outfile.seek(0, 2) # seek to end of the file.
             else:
                 outfile = open(args.output, 'w+t')
@@ -296,19 +337,20 @@ def main():
         try:
             print(f"running {i}/{len(filtered_benches)}: {b.name}", flush=True)
 
-            if args.warmup:
-                b.run()
+            if args.warmup or args.verify:
+                b.run(verify=args.verify)
 
             res = []
             for i in range(args.repeat):
-                res.append(str(b.run()))
+                res.append(str(b.run(verify=False)))
 
             print(b.name + "," + ", ".join(res), file=outfile)
-            summary[b.name]["run"] = "success"
         except Exception as e:
             print("Error running: ", b.name)
             print(e)
-            summary[b.name]["run"] = "failed"
+
+        summary[b.name]["run"] = b.run_status
+        summary[b.name]["verification"] = b.verification_status
 
     if args.output:
         outfile.close()
@@ -325,12 +367,15 @@ def main():
         logging.info(f"Wrote the summary to {args.summary}.")
     else:
         print(json.dumps(summary, indent=4, sort_keys=True))
-    res = sum(('compile' in x.keys() and x['compile'] == 'failed' or
-               'run' in x.keys() and x['run'] == 'failed') for x in summary.values())
-    print(f'Number of benchmark compile or run failures: {res}');
+    failed_compile_run = sum(('compile' in x.keys() and x['compile'] == Status.FAILED or
+                'run' in x.keys() and x['run'] == Status.FAILED) for x in summary.values())
+    print(f'Number of benchmark compile or run failures: {failed_compile_run}')
+
+    failed_verif = sum(('verification' in x.keys() and x['verification'] == Status.FAILED )
+                    for x in summary.values())
+    print(f'Number of benchmark verification failures: {failed_verif}')
     print("*****************************************************************************************")
 
 
 if __name__ == "__main__":
     main()
-