diff --git a/src/scripts/autohecbench.py b/src/scripts/autohecbench.py index c1b94b36b7..f66d2c40e1 100755 --- a/src/scripts/autohecbench.py +++ b/src/scripts/autohecbench.py @@ -8,6 +8,12 @@ import logging import traceback +class Status(): + FAILED = "failed" + SUCCESS = "success" + SKIPPED = "skipped" + NOT_EVALUATED = "not_evaluated" + def await_input(prompt: str, is_valid_input) -> str: """ Wait the user for input until it is valid. """ r = input(prompt) @@ -16,7 +22,7 @@ def await_input(prompt: str, is_valid_input) -> str: return r class Benchmark: - def __init__(self, args, name, res_regex, run_args = [], binary = "main", invert = False): + def __init__(self, args, name, res_regex, verif_info, run_args = [], binary = "main", invert = False): if name.endswith('sycl'): logging.info(f"Type of SYCL device to use: {args.sycl_type}") self.MAKE_ARGS = ['GCC_TOOLCHAIN="{}"'.format(args.gcc_toolchain)] @@ -49,6 +55,9 @@ def __init__(self, args, name, res_regex, run_args = [], binary = "main", invert else: self.MAKE_ARGS = [] + if(args.verify ): + self.MAKE_ARGS.append('VERIFY=yes') + if args.compiler_name: self.MAKE_ARGS.append('CC={}'.format(args.compiler_name)) @@ -64,11 +73,16 @@ def __init__(self, args, name, res_regex, run_args = [], binary = "main", invert self.name = name self.binary = binary self.res_regex = res_regex + self.verif_info = verif_info self.args = run_args self.invert = invert self.clean = args.clean self.verbose = args.verbose + self.compilation_status = Status.NOT_EVALUATED + self.run_status = Status.NOT_EVALUATED + self.verification_status = Status.NOT_EVALUATED + def compile(self, shared_data): if self.clean: subprocess.run(["make", "clean"], cwd=self.path).check_returncode() @@ -83,7 +97,7 @@ def compile(self, shared_data): try: proc.check_returncode() - shared_data[self.name] = "success" + shared_data[self.name] = Status.SUCCESS except subprocess.CalledProcessError as e: print(f'Failed compilation in {self.path}.\n{e}') if e.stderr: @@ -96,13 +110,13 @@ def compile(self, shared_data): print(cause.stdout) print(cause.stderr) print("*****************************************************************************************") - shared_data[self.name] = "failed" + shared_data[self.name] = Status.FAILED #raise(e) if self.verbose: print(proc.stdout) - def run(self): + def run(self, verify = False): cmd = ["./" + self.binary] + self.args proc = subprocess.run(cmd, cwd=self.path, timeout=600, stdout=subprocess.PIPE, encoding="utf-8") @@ -118,10 +132,34 @@ def run(self): print("Position:", e.pos) logging.debug(f'Results of re.findall:\n {res}') if not res: + self.run_status = Status.FAILED raise Exception(self.path + ":\nno regex match for " + self.res_regex + " in\n" + out) + self.run_status = Status.SUCCESS res = sum([float(i) for i in res]) #in case of multiple outputs sum them (e.g. total time) if self.invert: res = 1/res + + if(verify != True): + return res + + verif_type = self.verif_info[0] + verif_args = self.verif_info[1] + + if (verif_type == "no_verification"): + self.verification_status = Status.SKIPPED + + elif (verif_type == "verification_token"): + reg_success = verif_args[0] + reg_fail = verif_args[1] + + match_success = re.findall(reg_success, out) + match_fail = re.findall(reg_fail, out) + + if( match_fail == [] and match_success != [] ): + self.verification_status = Status.SUCCESS + else: + self.verification_status = Status.FAILED + return res @@ -144,6 +182,8 @@ def main(): help='Repeat benchmark run') parser.add_argument('--warmup', '-w', type=bool, default=True, help='Run a warmup iteration') + parser.add_argument('--verify', type=bool, default=True, + help='verify benchmark results') parser.add_argument('--sycl-type', '-t', choices=['cuda', 'hip', 'opencl', 'cpu'], default='cuda', help='Type of SYCL device to use (default is cuda)') parser.add_argument('--nvidia-sm', type=int, default=60, @@ -279,7 +319,8 @@ def main(): # record the status only when it is in the input benchmark list ch_index = bench.find('-') if bench[:ch_index] in benchmarks.keys(): - summary[bench]["run"] = "skipped" + summary[bench]["run"] = Status.SKIPPED + summary[bench]["verification"] = Status.SKIPPED outfile.seek(0, 2) # seek to end of the file. else: outfile = open(args.output, 'w+t') @@ -296,19 +337,20 @@ def main(): try: print(f"running {i}/{len(filtered_benches)}: {b.name}", flush=True) - if args.warmup: - b.run() + if args.warmup or args.verify: + b.run(verify=args.verify) res = [] for i in range(args.repeat): - res.append(str(b.run())) + res.append(str(b.run(verify=False))) print(b.name + "," + ", ".join(res), file=outfile) - summary[b.name]["run"] = "success" except Exception as e: print("Error running: ", b.name) print(e) - summary[b.name]["run"] = "failed" + + summary[b.name]["run"] = b.run_status + summary[b.name]["verification"] = b.verification_status if args.output: outfile.close() @@ -325,12 +367,15 @@ def main(): logging.info(f"Wrote the summary to {args.summary}.") else: print(json.dumps(summary, indent=4, sort_keys=True)) - res = sum(('compile' in x.keys() and x['compile'] == 'failed' or - 'run' in x.keys() and x['run'] == 'failed') for x in summary.values()) - print(f'Number of benchmark compile or run failures: {res}'); + failed_compile_run = sum(('compile' in x.keys() and x['compile'] == Status.FAILED or + 'run' in x.keys() and x['run'] == Status.FAILED) for x in summary.values()) + print(f'Number of benchmark compile or run failures: {failed_compile_run}') + + failed_verif = sum(('verification' in x.keys() and x['verification'] == Status.FAILED ) + for x in summary.values()) + print(f'Number of benchmark verification failures: {failed_verif}') print("*****************************************************************************************") if __name__ == "__main__": main() - diff --git a/src/scripts/benchmarks/subset.json b/src/scripts/benchmarks/subset.json index ec3771d2ce..612b9f1c89 100644 --- a/src/scripts/benchmarks/subset.json +++ b/src/scripts/benchmarks/subset.json @@ -1,6 +1,13 @@ { "accuracy": [ "(?:Average execution time of accuracy kernel: )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "8192", "10000", @@ -10,12 +17,26 @@ ], "ace": [ "(?:Offload time: )([0-9.+-e]+)(?: \\(ms\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "100" ] ], "adam": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(ms\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "10000", "200", @@ -24,6 +45,10 @@ ], "adv": [ "(?:elapsed time=)([0-9.+-e]+)(?: )", + [ + "no_verification", + [] + ], [ "16", "16", @@ -32,6 +57,10 @@ ], "axhelm": [ "(?:elapsed time=)([0-9.+-e]+)(?: )", + [ + "no_verification", + [] + ], [ "3", "8000", @@ -40,6 +69,13 @@ ], "aidw": [ "(?:Average execution time of AIDW_Kernel_Tiled )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "10", "1", @@ -48,18 +84,33 @@ ], "all-pairs-distance": [ "(?:Average kernel execution time \\(w/o? shared memory\\): )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "1000" ] ], "aobench": [ "(?:Average render time: )([0-9.+-e]+)(?: sec)", + [ + "no_verification", + [] + ], [ "1000" ] ], "aop": [ "(?:elapsed time for each run *: )([0-9.+-e]+)(?:ms)", + [ + "no_verification", + [] + ], [ "-paths", "200" @@ -67,6 +118,13 @@ ], "asmooth": [ "(?:Average filtering time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "10000", "1", @@ -75,25 +133,57 @@ ] ], "assert": [ - "(?:Kernel time : )([0-9.+-e]+)" + "(?:Kernel time : )([0-9.+-e]+)", + [ + "no_verification", + [] + ] ], "asta": [ - "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))" + "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ] ], "atomicAggregate": [ "(?:Total kernel time \\(32 locations\\): )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "1000" ] ], "atomicCAS": [ "(?:Average execution time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "10000" ] ], "atomicCost": [ "(?:Average execution time of WithoutAtomicOnGlobalMem: )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "16", "10" @@ -101,16 +191,31 @@ ], "atomicPerf": [ "(?:Average execution time of SingleRangeAtomicOnGlobalMem: )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], [ "10" ] ], "atomicReduction": [ "(?:The average performance of reduction is )([0-9.+-e]+)(?: GBytes/sec)", + [ + "validation_token", + [ + "VERIFICATION: PASS", + "VERIFICATION: FAIL!!" + ] + ], [] ], "attention": [ "(?:Average execution time of kernels )([0-9.+-e]+)(?: \\(ms\\))", + [ + "no_verification", + [] + ], [ "8192", "8192", @@ -120,18 +225,33 @@ ], "backprop": [ "(?:Device offloading time = )([0-9.+-e]+)(?:\\(s\\))", + [ + "no_verification", + [] + ], [ "65536" ] ], "background-subtract": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], [ "4096", "2048", "1", "102" ] ], "bezier-surface": [ "(?:kernel execution time: )([0-9.+-e]+)(?: ms)", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "-n", "8192" @@ -139,6 +259,10 @@ ], "bh": [ "(?:Total kernel execution time: )([0-9.+-e]+)(?: s)", + [ + "no_verification", + [] + ], [ "300", "30" @@ -146,6 +270,13 @@ ], "bilateral": [ "(?:Average kernel execution time \\([1-9]x[1-9]\\) )([0-9.+-e]+)(?: \\(ms\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "2960", "1440", @@ -155,26 +286,55 @@ ] ], "binomial": [ - "(?:Total binomialOptionsGPU\\(\\) time: )([0-9.+-e]+)(?: msec)" + "(?:Total binomialOptionsGPU\\(\\) time: )([0-9.+-e]+)(?: msec)", + [ + "validation_token", + [ + "Test passed", + "Test failed!" + ] + ] ], "bitonic-sort": [ "(?:Total kernel execution time: )([0-9.+-e]+)(?: \\(ms\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "25", "2" ] ], "bitpacking": [ - "(?:Device offload time = )([0-9.+-e]+)(?: \\((?:s|ms|us)\\))" + "(?:Device offload time = )([0-9.+-e]+)(?: \\((?:s|ms|us)\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ] ], "black-scholes": [ "(?:Processing time on GPU: )([0-9.+-e]+)(?: \\(ms\\))", + [ + "no_verification", + [] + ], [ "100" ] ], "bn": [ "(?:Average execution time of genScoreKernel: )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "result", "10" @@ -182,12 +342,23 @@ ], "bonds": [ "(?:Average processing time on GPU: )([0-9.+-e]+)(?: \\(ms\\))", + [ + "no_verification", + [] + ], [ "100" ] ], "boxfilter": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "../boxfilter-sycl/data/lenaRGB.ppm", "10000" @@ -195,16 +366,34 @@ ], "bsearch": [ "(?:Average kernel execution time \\(bs[1-9]\\) )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "", + "incorrect result" + ] + ], [ "16384", "1" ] ], "bspline-vgh": [ - "(?:Total kernel execution time )([0-9.+-e]+)(?: \\(s\\))" + "(?:Total kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ] ], "burger": [ "(?:Total kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "8193", "8193", @@ -213,12 +402,23 @@ ], "bwt": [ "(?:Device time: )([0-9.+-e]+)(?: (?:s|ms|us))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "10000000" ] ], "cbsfil": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "10000", "10000", @@ -227,6 +427,10 @@ ], "ccsd-trpdrv": [ "(?:avg=)([0-9.+-e]+)", + [ + "no_verification", + [] + ], [ "200", "200" @@ -234,6 +438,13 @@ ], "channelShuffle": [ "(?:Average time of channel shuffle \\(\\w+\\): )([0-9.+-e]+)(?: \\(ms\\))", + [ + "validation_token", + [ + "Average time of channel shuffle", + "Failed to execute channel shuffle" + ] + ], [ "2", "224", @@ -243,6 +454,13 @@ ], "channelSum": [ "(?:Average time of channel sum \\(\\w+\\): )([0-9.+-e]+)(?: \\(ms\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "224", "224", @@ -251,6 +469,10 @@ ], "chi2": [ "(?:Average kernel execution time = )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "4000", "400000", @@ -262,19 +484,38 @@ ], "chemv": [ "(?:Average execution time of chemv kernels: )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASSED", + "FAILED" + ] + ], [] ], "clenergy": [ - "(?:Kernel time: )([0-9.+-e]+)(?: seconds)" + "(?:Kernel time: )([0-9.+-e]+)(?: seconds)", + [ + "no_verification", + [] + ] ], "clink": [ "(?:Average kernel time: )([0-9.+-e]+)(?: ms)", + [ + "no_verification", + [] + ], [ "10" ] ], "cmp": [ "(?: Giga semblances traces per second: )([0-9.+-e]+)", + [ + "no_verification", + [] + ], [ "-c0", "1.98e-7","-c1","1.77e-6","-nc","5","-aph","600","-tau","0.002","-v","2", "-i", "../cmp-cuda/data/simple-synthetic.su" @@ -282,6 +523,10 @@ ], "cobahh": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], [ "1000000", "10" @@ -289,6 +534,13 @@ ], "complex": [ "(?:Average kernel execution time \\([a-z]+\\) )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "10000000", "1000" @@ -296,30 +548,59 @@ ], "compute-score": [ "(?:Kernel Time = )([0-9.+-e]+)(?: ms)", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "-p=1000" ] ], "concat": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], [ "1000" ] ], "concurrentKernels": [ "(?:Measured time for sample = )([0-9.+-e]+)(?:s)", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "4" ] ], "convolution3D": [ "(?:Average kernel execution time of conv3d_s3 kernel: )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], [ "32","6","16","14","14","5","100" ] ], "convolutionSeparable": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "8192", "8192", @@ -328,6 +609,13 @@ ], "cooling": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\((?:s|ms|us)\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "1000000", "1000" @@ -335,6 +623,13 @@ ], "cross": [ "(?:Average execution time of cross[0-9]+ kernel: )([0-9.+-e]+)(?: \\((?:s|ms|us)\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "10000000", "100" @@ -342,12 +637,23 @@ ], "crossEntropy": [ "(?:Average GPU kernel time \\(ms\\) : )([0-9.+-e]+)", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "100" ] ], "crs": [ "(?:Total encoding time )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "1", "1" @@ -355,10 +661,21 @@ ], "damage": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], ["50000000", "1000"] ], "dct8x8": [ "(?:Average DCT8x8 kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "8192", "8192", @@ -367,10 +684,24 @@ ], "degrid": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [] ], "dense-embedding": [ "(?:Average execution time of dense embedding kernel \\(k1\\): )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "100000", "256", @@ -379,6 +710,13 @@ ], "dp": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "10000000", "10" @@ -386,6 +724,10 @@ ], "dwconv": [ "(?:Average execution time of dwconv2d_forward kernel: )([0-9.+-e]+)(?: \\(ms\\))", + [ + "no_verification", + [] + ], [ "128", "16", @@ -396,6 +738,13 @@ ], "dxtc2": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "../dxtc2-sycl/data/lena_std.ppm", "../dxtc2-sycl/data/lena_ref.dds", @@ -404,6 +753,13 @@ ], "ecdh": [ "(?:: )([0-9.+-e]+)(?: s)", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "50000000", "100" @@ -411,6 +767,13 @@ ], "eigenvalue": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "2048", "10000" @@ -418,6 +781,13 @@ ], "entropy": [ "(?:execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "8192", "8192", @@ -426,6 +796,13 @@ ], "epistasis": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "146", "31339", @@ -434,6 +811,10 @@ ], "ert": [ "(?:runtime \\([a-z1-9]*\\): )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "128", "1024" @@ -441,6 +822,10 @@ ], "expdist": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "10000", "10" @@ -448,18 +833,39 @@ ], "extend2": [ "(?:Average offload time )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "", + "Error:" + ] + ], [ "300" ] ], "f16sp": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "100000" ] ], "fdtd3d": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\((?:s|ms|us)\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "--dimx=192", "--dimy=184", @@ -468,6 +874,13 @@ ], "fft": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "3", "100" @@ -475,6 +888,13 @@ ], "filter": [ "(?:Average execution time of filter \\(shared memory\\) )([0-9.+-e]+)(?: \\(ms\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "100000000", "256", @@ -483,6 +903,10 @@ ], "flip": [ "(?:Average execution time of the flip kernel: )([0-9.+-e]+)(?: \\(ms\\))", + [ + "no_verification", + [] + ], [ "3", "1024", @@ -491,6 +915,13 @@ ], "floydwarshall": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "1024", "100", @@ -499,6 +930,13 @@ ], "fpc": [ "(?:fpc2: average device offload time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "256", "100" @@ -506,18 +944,39 @@ ], "fsm": [ "([0-9.+-e]+)(?:\\s*#runtime \\[s\\])", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "65536" ] ], "fwt": [ "(?:Average device execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "100" ] ], "gabor": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "8640", "15360", @@ -526,6 +985,13 @@ ], "gamma-correction": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "7680", "4320", @@ -535,6 +1001,13 @@ ], "gaussian": [ "(?:Device offloading time )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "-q", "-t", @@ -544,7 +1017,11 @@ ], "gd": [ "(?:Training time takes )([0-9.+-e]+)(?: \\(s\\))", - [ + [ + "no_verification", + [] + ], + [ "../gd-cuda/gisette_scale", "0.0001", "10", @@ -553,6 +1030,10 @@ ], "gelu": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(ms\\))", + [ + "no_verification", + [] + ], [ "512", "8192", @@ -562,12 +1043,23 @@ ], "geodesic": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], [ "1000" ] ], "glu": [ "(?:Average execution time of GLU kernel \\(split dimension = 2\\): )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "3", "1024", @@ -576,22 +1068,50 @@ ], "goulash": [ "(?:total kernel time )([0-9.+-e]+)(?:\\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "101", "5" ] ], "gpp": [ - "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))" + "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "SUCCESS", + "FAILURE\n" + ] + ] ], "haccmk": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "1000" ] ], "hausdorff": [ "(?:Average execution time of kernels: )([0-9.+-e]+)(?: \\(ms\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "100000", "100000", @@ -600,6 +1120,10 @@ ], "heat": [ "(?:Solve time \\(s\\): )([0-9.+-e]+)", + [ + "no_verification", + [] + ], [ "4096", "1000" @@ -607,6 +1131,10 @@ ], "heat2d": [ "([0-9.+-e]+)(?: usec/iter)", + [ + "no_verification", + [] + ], [ "1024", "1024", @@ -615,12 +1143,23 @@ ], "knn": [ "(?:100 iterations \\()([0-9.+-e]+)(?: s by iteration\\))", + [ + "no_verification", + [] + ], [ "100" ] ], "histogram": [ "(?:Avg time )([0-9.+-e]+)(?: us )", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "--width=3840", "--height=2160" @@ -628,14 +1167,32 @@ ], "hmm": [ "(?:Device execution time of Viterbi iterations )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "Success", + "Fail" + ] + ], [] ], "hotspot3D": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], ["512", "8", "5000", "../data/hotspot3D/power_512x8", "../data/hotspot3D/temp_512x8", "output.out"] ], "hwt1d": [ "(?:Average device offload time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "8388608", "100" @@ -643,6 +1200,13 @@ ], "hybridsort": [ "(?:GPU execution time: )([0-9.+-e]+)(?: ms)", + [ + "validation_token", + [ + "PASSED", + "FAILED" + ] + ], [ "r" ], @@ -650,16 +1214,34 @@ ], "inversek2j": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "../inversek2j-sycl/coord_in.txt", "100000" ] ], "ising": [ - "(?:elapsed time: )([0-9.+-e]+)(?: sec)" + "(?:elapsed time: )([0-9.+-e]+)(?: sec)", + [ + "no_verification", + [] + ] ], "iso2dfd": [ "(?:Total kernel execution time )([0-9.+-e]+)(?: \\(ms\\))", + [ + "validation_token", + [ + "", + "ERROR:" + ] + ], [ "2048", "2048", @@ -668,13 +1250,31 @@ ], "lid-driven-cavity": [ "(?:Total execution time of the iteration loop: )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [] ], "jacobi": [ - "(?:Total elapsed time: )([0-9.+-e]+)(?: seconds)" + "(?:Total elapsed time: )([0-9.+-e]+)(?: seconds)", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ] ], "jenkins-hash": [ "(?:Average kernel execution time : )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "256", "16777216", @@ -682,7 +1282,11 @@ ] ], "laplace": [ - "(?:iterations: )([0-9.+-e]+)(?: s)" + "(?:iterations: )([0-9.+-e]+)(?: s)", + [ + "no_verification", + [] + ] ], "laplace3d": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(s\\))", @@ -696,6 +1300,10 @@ ], "lavaMD": [ "(?:Device offloading time:\\s)([0-9.+-e]+)(?: s)", + [ + "no_verification", + [] + ], [ "-boxes1d", "16" @@ -703,18 +1311,39 @@ ], "layout": [ "(?:Average kernel execution time \\(SoA\\): )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "1000" ] ], "libor": [ "(?:Average kernel execution time : )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "100" ] ], "lif": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "1000", "1000", @@ -723,6 +1352,10 @@ ], "linearprobing": [ "(?:Kernel execution time \\(iterate\\): )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "16", "8" @@ -730,6 +1363,10 @@ ], "logprob": [ "(?:Average execution time of kernels: )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], [ "100", "128", @@ -739,12 +1376,23 @@ ], "lombscargle": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "100" ] ], "lr": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: us)", + [ + "no_verification", + [] + ], [ "100000", "0" @@ -752,12 +1400,23 @@ ], "mandelbrot": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: ms)", + [ + "validation_token", + [ + "", + "Verification failure" + ] + ], [ "1000" ] ], "mask": [ "(?:Average execution time of \\w+ kernel: )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], [ "512", "512", @@ -767,6 +1426,13 @@ ], "matrix-rotate": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "5000", "100" @@ -774,6 +1440,13 @@ ], "maxpool3d": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "2048", "2048", @@ -783,6 +1456,10 @@ ], "mcpr": [ "(?:Average execution time of compute_probs_unitStrides kernel: )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "../mcpr-cuda/alphas.csv", "10" @@ -790,6 +1467,10 @@ ], "md": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "3", "1000" @@ -797,6 +1478,13 @@ ], "meanshift": [ "(?:Average execution time of mean-shift \\(opt\\) )([0-9.+-e]+)(?: ms)", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "../meanshift-cuda/dataset/data.csv", "../meanshift-cuda/dataset/centroids.csv" @@ -804,6 +1492,13 @@ ], "medianfilter": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "../medianfilter-sycl/data/SierrasRGB.ppm", "1000" @@ -811,19 +1506,41 @@ ], "merge": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "100000", "100" ] ], "merkle": [ - "(?: )([0-9.+-e]+)(?: ms)" + "(?: )([0-9.+-e]+)(?: ms)", + [ + "no_verification", + [] + ] ], "minimod": [ - "(?:Average kernel time per iteration: )([0-9.+-e]+)(?: s)" + "(?:Average kernel time per iteration: )([0-9.+-e]+)(?: s)", + [ + "no_verification", + [] + ] ], "minisweep": [ "(?:kernel time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "--niterations", "5" @@ -831,21 +1548,43 @@ ], "minkowski": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "100" ] ], "mnist": [ "(?:Total time \\(learn \\+ test\\) )([0-9.+-e]+)(?: secs)", + [ + "no_verification", + [] + ], [ "2" ] ], "mr": [ - "(?:Total kernel execution time \\(\\w+\\s*\\): )([0-9.+-e]+)(?: \\(ms\\))" + "(?:Total kernel execution time \\(\\w+\\s*\\): )([0-9.+-e]+)(?: \\(ms\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ] ], "multinomial": [ "(?:Average execution time of sampleMultinomialOnce kernel: )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], [ "65536", "2048", @@ -854,6 +1593,10 @@ ], "mrc": [ "(?:Average execution time of MRC2 kernel: )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], [ "10000000", "1000" @@ -861,6 +1604,13 @@ ], "murmurhash3": [ "(?:Average kernel execution time )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "SUCCESS", + "FAIL" + ] + ], [ "100000", "100" @@ -868,12 +1618,20 @@ ], "nbnxm": [ "(?:Average kernel execution time \\(w/o shift\\): )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], [ "100" ] ], "nbody": [ "(?:Total Time \\(s\\)[ ]*: )([0-9.+-e]+)", + [ + "no_verification", + [] + ], [ "20000", "100" @@ -881,6 +1639,10 @@ ], "nlll": [ "(?:Average execution time of nll loss forward kernel: )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], [ "2048", "1000", @@ -888,10 +1650,21 @@ ] ], "overlap": [ - "(?:Avg. time when overlapped using [0-9]+ streams.*: )([0-9.+-e]+)(?: ms)" + "(?:Avg. time when overlapped using [0-9]+ streams.*: )([0-9.+-e]+)(?: ms)", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ] ], "overlay": [ "(?:Total kernel execution time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "640", "480" @@ -899,12 +1672,23 @@ ], "p4": [ "(?:Average execution time of postprocess kernel: )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], [ "10000" ] ], "pad": [ "(?:Total padding execution time for [0-9.+-e]+ iterations: )([0-9.+-e]+)(?: \\((?:s|ms|us)\\))", + [ + "validation_token", + [ + "Test Passed", + "Test failed" + ] + ], [ "-a", "0.1" @@ -912,6 +1696,10 @@ ], "page-rank": [ "(?:Total kernel execution time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "1000", "1000" @@ -919,6 +1707,10 @@ ], "particle-diffusion": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "10", "5" @@ -926,6 +1718,10 @@ ], "particlefilter": [ "(?:Device offloading time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "-x", "200", @@ -939,6 +1735,10 @@ ], "pathfinder": [ "(?:Total kernel execution time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "1000", "1000", @@ -947,6 +1747,10 @@ ], "permute": [ "(?:block_size 256 | time )([0-9.+-e]+)(?: ms)", + [ + "no_verification", + [] + ], [ "8", "100" @@ -954,6 +1758,10 @@ ], "perplexity": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "10000", "50", @@ -961,6 +1769,10 @@ ], "pns": [ "(?:Total device execution time: )([0-9.+-e]+)(?: s)", + [ + "no_verification", + [] + ], [ "5000", "100", @@ -969,6 +1781,10 @@ ], "pointwise": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "100", "8", @@ -979,12 +1795,23 @@ ], "projectile": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "1000" ] ], "rsc": [ "(?:Total task execution time for [0-9.+-e]+ iterations: )([0-9.+-e]+)(?: \\((?:s|ms|us)\\))", + [ + "validation_token", + [ + "Test Passed", + "Test failed" + ] + ], [ "-f", "../rsc-cuda/input/vectors.csv", @@ -994,6 +1821,10 @@ ], "s3d": [ "(?:Total time )([0-9.+-e]+)(?: secs)", + [ + "no_verification", + [] + ], [ "-s", "3", @@ -1003,6 +1834,13 @@ ], "sc": [ "(?:Total stream compaction time for [0-9.+-e]+ iterations: )([0-9.+-e]+)(?: \\((?:s|ms|us)\\))", + [ + "validation_token", + [ + "Test failed", + "Test Passed" + ] + ], [ "-a", "0.1" @@ -1010,6 +1848,10 @@ ], "softmax": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(ms\\))", + [ + "no_verification", + [] + ], [ "100000", "784", @@ -1018,10 +1860,21 @@ ] ], "sheath": [ - "(?:Time per time step: )([0-9.+-e]+)(?: \\(ms\\))" + "(?:Time per time step: )([0-9.+-e]+)(?: \\(ms\\))", + [ + "no_verification", + [] + ] ], "simplemoc": [ "(?:Total kernel time: )([0-9.+-e]+)(?: seconds)", + [ + "validation_token", + [ + "Success", + "Fail" + ] + ], [ "-s", "5000000", @@ -1033,6 +1886,10 @@ ], "stddev": [ "(?:Average execution time of stddev kernels: )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "65536", "16384", @@ -1040,10 +1897,18 @@ ] ], "su3": [ - "(?:Total kernel execution time = )([0-9.+-e]+)(?: \\(s\\))" + "(?:Total kernel execution time = )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ] ], "swish": [ "(?:Average execution time of SwishGradient kernel: )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], [ "10000000", "1000" @@ -1051,6 +1916,10 @@ ], "tissue": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "no_verification", + [] + ], [ "32", "100" @@ -1058,6 +1927,13 @@ ], "tqs": [ "(?:Total task execution time for [0-9.+-e]+ iterations: )([0-9.+-e]+)(?: \\((?:s|ms|us)\\))", + [ + "validation_token", + [ + "", + "Test failed" + ] + ], [ "-f", "../tqs-cuda/input/patternsNP100NB512FB25.txt" @@ -1065,12 +1941,23 @@ ], "tridiagonal": [ "(?:Time = )([0-9.+-e]+)(?: s)", + [ + "no_verification", + [] + ], [ "-num_systems=30000" ] ], "vanGenuchten": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: \\(s\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "256", "256", @@ -1080,21 +1967,43 @@ ], "vol2col": [ "(?:Average execution time of vol2col kernel: )([0-9.+-e]+)(?: \\(us\\))", + [ + "no_verification", + [] + ], [ "1000" ] ], "winograd": [ - "(?:Co-execution time: )([0-9.+-e]+)(?: s)" + "(?:Co-execution time: )([0-9.+-e]+)(?: s)", + [ + "no_verification", + [] + ], ], "zerocopy": [ "(?:Average kernel execution time: )([0-9.+-e]+)(?: ms)", + [ + "validation_token", + [ + "SUCCESS", + "FAILURE" + ] + ], [ "100" ] ], "zeropoint": [ "(?:Average execution time of zero-point kernel: )([0-9.+-e]+)(?: \\(us\\))", + [ + "validation_token", + [ + "PASS", + "FAIL" + ] + ], [ "10000000", "100"