From 26346af6d7178b9973e62aafcf79b0058932dc1f Mon Sep 17 00:00:00 2001
From: David Seddon <david.seddon@octoenergy.com>
Date: Tue, 8 Apr 2025 08:58:57 +0100
Subject: [PATCH 1/2] Split benchmark tests into two files

---
 tests/benchmarking/test_building_graph.py     |  70 ++++++++++++
 ..._benchmarking.py => test_graph_methods.py} | 108 ++++--------------
 2 files changed, 91 insertions(+), 87 deletions(-)
 create mode 100644 tests/benchmarking/test_building_graph.py
 rename tests/benchmarking/{test_benchmarking.py => test_graph_methods.py} (83%)

diff --git a/tests/benchmarking/test_building_graph.py b/tests/benchmarking/test_building_graph.py
new file mode 100644
index 00000000..9f854b6b
--- /dev/null
+++ b/tests/benchmarking/test_building_graph.py
@@ -0,0 +1,70 @@
+import pytest
+import json
+import importlib
+from pathlib import Path
+from grimp.adaptors.graph import ImportGraph
+import grimp
+
+
+@pytest.fixture(scope="module")
+def large_graph():
+    raw_json = (Path(__file__).parent / "large_graph.json").read_text()
+    graph_dict = json.loads(raw_json)
+    graph = ImportGraph()
+
+    for importer, importeds in graph_dict.items():
+        graph.add_module(importer)
+        for imported in importeds:
+            graph.add_import(
+                importer=importer,
+                imported=imported,
+                line_number=1,
+                line_contents=f"import {imported}",
+            )
+
+    return graph
+
+
+def test_build_django_uncached(benchmark):
+    """
+    Benchmarks building a graph of real package - in this case Django.
+
+    In this benchmark, the cache is turned off.
+    """
+    benchmark(grimp.build_graph, "django", cache_dir=None)
+
+
+def test_build_django_from_cache_no_misses(benchmark):
+    """
+    Benchmarks building a graph of real package - in this case Django.
+
+    This benchmark fully utilizes the cache.
+    """
+    # Populate the cache first, before beginning the benchmark.
+    grimp.build_graph("django")
+
+    benchmark(grimp.build_graph, "django")
+
+
+@pytest.mark.parametrize(
+    "number_of_misses",
+    (
+        2,  # Fewer than the likely number of CPUs.
+        15,  # A bit more than the likely number of CPUs.
+    ),
+)
+def test_build_django_from_cache_a_few_misses(benchmark, number_of_misses):
+    """
+    Benchmarks building a graph of real package - in this case Django.
+
+    This benchmark utilizes the cache except for a few modules, which we add.
+    """
+    # Populate the cache first, before beginning the benchmark.
+    grimp.build_graph("django")
+    # Add a module which won't be in the cache.
+    django_path = Path(importlib.util.find_spec("django").origin).parent
+    for i in range(number_of_misses):
+        new_module = django_path / f"new_module_{i}.py"
+        new_module.write_text("from django.db import models")
+
+    benchmark(grimp.build_graph, "django")
diff --git a/tests/benchmarking/test_benchmarking.py b/tests/benchmarking/test_graph_methods.py
similarity index 83%
rename from tests/benchmarking/test_benchmarking.py
rename to tests/benchmarking/test_graph_methods.py
index 31cd3ef6..c676e5d2 100644
--- a/tests/benchmarking/test_benchmarking.py
+++ b/tests/benchmarking/test_graph_methods.py
@@ -1,17 +1,11 @@
 import pytest
 import json
-import importlib
 from pathlib import Path
 from grimp.adaptors.graph import ImportGraph
 from grimp import PackageDependency, Route
-import grimp
 from copy import deepcopy
 
 
-def _run_benchmark(benchmark, fn, *args, **kwargs):
-    return benchmark(fn, *args, **kwargs)
-
-
 @pytest.fixture(scope="module")
 def large_graph():
     raw_json = (Path(__file__).parent / "large_graph.json").read_text()
@@ -291,51 +285,6 @@ def large_graph():
 }
 
 
-def test_build_django_uncached(benchmark):
-    """
-    Benchmarks building a graph of real package - in this case Django.
-
-    In this benchmark, the cache is turned off.
-    """
-    _run_benchmark(benchmark, grimp.build_graph, "django", cache_dir=None)
-
-
-def test_build_django_from_cache_no_misses(benchmark):
-    """
-    Benchmarks building a graph of real package - in this case Django.
-
-    This benchmark fully utilizes the cache.
-    """
-    # Populate the cache first, before beginning the benchmark.
-    grimp.build_graph("django")
-
-    _run_benchmark(benchmark, grimp.build_graph, "django")
-
-
-@pytest.mark.parametrize(
-    "number_of_misses",
-    (
-        2,  # Fewer than the likely number of CPUs.
-        15,  # A bit more than the likely number of CPUs.
-    ),
-)
-def test_build_django_from_cache_a_few_misses(benchmark, number_of_misses):
-    """
-    Benchmarks building a graph of real package - in this case Django.
-
-    This benchmark utilizes the cache except for a few modules, which we add.
-    """
-    # Populate the cache first, before beginning the benchmark.
-    grimp.build_graph("django")
-    # Add a module which won't be in the cache.
-    django_path = Path(importlib.util.find_spec("django").origin).parent
-    for i in range(number_of_misses):
-        new_module = django_path / f"new_module_{i}.py"
-        new_module.write_text("from django.db import models")
-
-    _run_benchmark(benchmark, grimp.build_graph, "django")
-
-
 class TestFindIllegalDependenciesForLayers:
     @staticmethod
     def _remove_package_dependencies(graph, package_dependencies):
@@ -352,8 +301,7 @@ def _remove_package_dependencies(graph, package_dependencies):
         return graph
 
     def test_top_level_large_graph_violated(self, large_graph, benchmark):
-        result = _run_benchmark(
-            benchmark,
+        result = benchmark(
             large_graph.find_illegal_dependencies_for_layers,
             layers=TOP_LEVEL_LAYERS,
             containers=("mypackage",),
@@ -364,8 +312,7 @@ def test_top_level_large_graph_kept(self, large_graph, benchmark):
         large_graph = self._remove_package_dependencies(
             large_graph, TOP_LEVEL_PACKAGE_DEPENDENCIES
         )
-        result = _run_benchmark(
-            benchmark,
+        result = benchmark(
             large_graph.find_illegal_dependencies_for_layers,
             layers=TOP_LEVEL_LAYERS,
             containers=("mypackage",),
@@ -373,50 +320,39 @@ def test_top_level_large_graph_kept(self, large_graph, benchmark):
         assert result == set()
 
     def test_deep_layers_large_graph_violated(self, large_graph, benchmark):
-        result = _run_benchmark(
-            benchmark, large_graph.find_illegal_dependencies_for_layers, layers=DEEP_LAYERS
-        )
+        result = benchmark(large_graph.find_illegal_dependencies_for_layers, layers=DEEP_LAYERS)
         assert result == DEEP_LAYER_PACKAGE_DEPENDENCIES
 
     def test_deep_layers_large_graph_kept(self, large_graph, benchmark):
         large_graph = self._remove_package_dependencies(
             large_graph, DEEP_LAYER_PACKAGE_DEPENDENCIES
         )
-        result = _run_benchmark(
-            benchmark, large_graph.find_illegal_dependencies_for_layers, layers=DEEP_LAYERS
-        )
+        result = benchmark(large_graph.find_illegal_dependencies_for_layers, layers=DEEP_LAYERS)
         assert result == set()
 
 
 def test_find_descendants(large_graph, benchmark):
-    result = _run_benchmark(benchmark, large_graph.find_descendants, "mypackage")
+    result = benchmark(large_graph.find_descendants, "mypackage")
     assert len(result) == 28222
 
 
 def test_find_downstream_modules(large_graph, benchmark):
-    result = _run_benchmark(
-        benchmark, large_graph.find_downstream_modules, DEEP_LAYERS[0], as_package=True
-    )
+    result = benchmark(large_graph.find_downstream_modules, DEEP_LAYERS[0], as_package=True)
     assert len(result) == 80
 
 
 def test_find_upstream_modules(large_graph, benchmark):
-    result = _run_benchmark(
-        benchmark, large_graph.find_upstream_modules, DEEP_LAYERS[0], as_package=True
-    )
+    result = benchmark(large_graph.find_upstream_modules, DEEP_LAYERS[0], as_package=True)
     assert len(result) == 2159
 
 
 class TestFindShortestChain:
     def test_chain_found(self, large_graph, benchmark):
-        result = _run_benchmark(
-            benchmark, large_graph.find_shortest_chain, DEEP_LAYERS[0], DEEP_LAYERS[1]
-        )
+        result = benchmark(large_graph.find_shortest_chain, DEEP_LAYERS[0], DEEP_LAYERS[1])
         assert result is not None
 
     def test_no_chain(self, large_graph, benchmark):
-        result = _run_benchmark(
-            benchmark,
+        result = benchmark(
             large_graph.find_shortest_chain,
             DEEP_LAYERS[0],
             "mypackage.data.vendors.4053192739.6373932949",
@@ -426,8 +362,7 @@ def test_no_chain(self, large_graph, benchmark):
 
 class TestFindShortestChains:
     def test_chains_found(self, large_graph, benchmark):
-        result = _run_benchmark(
-            benchmark,
+        result = benchmark(
             large_graph.find_shortest_chains,
             DEEP_LAYERS[0],
             DEEP_LAYERS[1],
@@ -436,8 +371,7 @@ def test_chains_found(self, large_graph, benchmark):
         assert len(result) > 0
 
     def test_no_chains(self, large_graph, benchmark):
-        result = _run_benchmark(
-            benchmark,
+        result = benchmark(
             large_graph.find_shortest_chains,
             DEEP_LAYERS[0],
             "mypackage.data.vendors.4053192739.6373932949",
@@ -447,7 +381,7 @@ def test_no_chains(self, large_graph, benchmark):
 
 
 def test_copy_graph(large_graph, benchmark):
-    _run_benchmark(benchmark, lambda: deepcopy(large_graph))
+    benchmark(lambda: deepcopy(large_graph))
 
 
 def test_modules_property_first_access(large_graph, benchmark):
@@ -459,7 +393,7 @@ def f():
         # Accessing the modules property is what we're benchmarking.
         _ = large_graph.modules
 
-    _run_benchmark(benchmark, f)
+    benchmark(f)
 
 
 def test_modules_property_many_accesses(large_graph, benchmark):
@@ -472,7 +406,7 @@ def f():
         for i in range(1000):
             _ = large_graph.modules
 
-    _run_benchmark(benchmark, f)
+    benchmark(f)
 
 
 def test_get_import_details(benchmark):
@@ -480,26 +414,26 @@ def test_get_import_details(benchmark):
     iterations = 100
     for i in range(iterations, 1):
         graph.add_import(
-            importer=f"blue_{i}", imported=f"green_{i}", line_contents="...", line_number=i
+            importer=f"blue_{i}",
+            imported=f"green_{i}",
+            line_contents="...",
+            line_number=i,
         )
 
     def f():
         for i in range(iterations):
             graph.get_import_details(importer=f"blue_{i}", imported=f"green_{i}")
 
-    _run_benchmark(benchmark, f)
+    benchmark(f)
 
 
 def test_find_matching_modules(benchmark, large_graph):
-    matching_modules = _run_benchmark(
-        benchmark, lambda: large_graph.find_matching_modules("mypackage.domain.**")
-    )
+    matching_modules = benchmark(lambda: large_graph.find_matching_modules("mypackage.domain.**"))
     assert len(matching_modules) == 2519
 
 
 def test_find_matching_direct_imports(benchmark, large_graph):
-    matching_imports = _run_benchmark(
-        benchmark,
+    matching_imports = benchmark(
         lambda: large_graph.find_matching_direct_imports(
             "mypackage.domain.** -> mypackage.data.**"
         ),

From 9c4b35aee69e7c50fc6003e493c997ecc817f75f Mon Sep 17 00:00:00 2001
From: David Seddon <david.seddon@octoenergy.com>
Date: Mon, 7 Apr 2025 17:09:18 +0100
Subject: [PATCH 2/2] Parallelize codspeed

Currently benchmarking takes much longer than the other jobs,
parallelizing it speeds up the time to completion.
---
 .github/workflows/main.yml | 8 ++++++--
 tox.ini                    | 3 ++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 47806a8c..ce0b7b01 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -55,6 +55,10 @@ jobs:
 
   benchmarks:
       runs-on: ubuntu-22.04
+      strategy:
+        matrix:
+          shard: [1,2]
+
       steps:
         - uses: actions/checkout@v4
 
@@ -74,11 +78,11 @@ jobs:
           run: |
             python -VV
             uv venv
-            uv pip install pytest==7.4.4 pyyaml==6.0.1 pytest-codspeed==3.2.0 Django==5.1.1 /home/runner/work/grimp/grimp         
+            uv pip install pytest==7.4.4 pyyaml==6.0.1 pytest-codspeed==3.2.0 pytest-test-groups==1.2.0 Django==5.1.1 /home/runner/work/grimp/grimp         
 
         - name: Run benchmarks
           uses: CodSpeedHQ/action@v3
           with:
             token: ${{ secrets.CODSPEED_TOKEN }}
             run: |
-              uv run pytest tests/benchmarking/ --codspeed
+              uv run pytest tests/benchmarking/ --codspeed --test-group=${{ matrix.shard }} --test-group-by filename --test-group-count=2
diff --git a/tox.ini b/tox.ini
index 3bdb2cb9..4931d0c7 100644
--- a/tox.ini
+++ b/tox.ini
@@ -77,9 +77,10 @@ deps =
     pytest==7.4.4
     pyyaml==6.0.1
     pytest-codspeed==3.2.0
+    pytest-test-groups==1.2.0
     Django==5.1.1
 commands =
-    pytest --codspeed {posargs}
+    pytest --codspeed tests/benchmarking {posargs}
 
 
 [testenv:docs]