diff --git a/scripts/benchmark_eval_analysis.py b/scripts/benchmark_eval_analysis.py index fe2d220c..d40170ee 100644 --- a/scripts/benchmark_eval_analysis.py +++ b/scripts/benchmark_eval_analysis.py @@ -47,6 +47,7 @@ def patch(eval_results, dataset): "runtime": -1.0, "runtime_stats": {} } + eval_results = dict(sorted(eval_results.items(), key=lambda x: int(x[0]))) return eval_results def analyze_greedy_eval(run_name, hardware, baseline, level):