diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c9600b81..7d6edd0a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,8 @@ Changelog Unreleased ---------- +* Use joblib instead of multiprocessing for CPU parallelism. Fixes https://github.com/seddonym/grimp/issues/208. + 3.8 (2025-04-11) ---------------- diff --git a/pyproject.toml b/pyproject.toml index f8da8904..9db5ae1b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ authors = [ ] requires-python = ">=3.9" dependencies = [ + "joblib>=1.3.0", "typing-extensions>=3.10.0.0", ] classifiers = [ diff --git a/src/grimp/application/usecases.py b/src/grimp/application/usecases.py index e19bb6e8..606befd1 100644 --- a/src/grimp/application/usecases.py +++ b/src/grimp/application/usecases.py @@ -3,9 +3,10 @@ """ from typing import Dict, Sequence, Set, Type, Union, cast, Iterable, Collection -import multiprocessing import math +import joblib # type: ignore + from ..application.ports import caching from ..application.ports.filesystem import AbstractFileSystem from ..application.ports.graph import ImportGraph @@ -228,7 +229,7 @@ def _create_chunks(module_files: Collection[ModuleFile]) -> tuple[tuple[ModuleFi module_files_tuple = tuple(module_files) number_of_module_files = len(module_files_tuple) - n_chunks = _decide_number_of_of_processes(number_of_module_files) + n_chunks = _decide_number_of_processes(number_of_module_files) chunk_size = math.ceil(number_of_module_files / n_chunks) return tuple( @@ -236,11 +237,11 @@ def _create_chunks(module_files: Collection[ModuleFile]) -> tuple[tuple[ModuleFi ) -def _decide_number_of_of_processes(number_of_module_files: int) -> int: +def _decide_number_of_processes(number_of_module_files: int) -> int: if number_of_module_files < MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING: - # Don't incur the overhead of multiprocessing. + # Don't incur the overhead of multiple processes. return 1 - return min(multiprocessing.cpu_count(), number_of_module_files) + return min(joblib.cpu_count(), number_of_module_files) def _scan_chunks( @@ -257,20 +258,15 @@ def _scan_chunks( ) number_of_processes = len(chunks) - if number_of_processes == 1: - # No need to spawn a process if there's only one chunk. - [chunk] = chunks - return _scan_chunk(import_scanner, exclude_type_checking_imports, chunk) - else: - with multiprocessing.Pool(number_of_processes) as pool: - imports_by_module_file: Dict[ModuleFile, Set[DirectImport]] = {} - import_scanning_jobs = pool.starmap( - _scan_chunk, - [(import_scanner, exclude_type_checking_imports, chunk) for chunk in chunks], - ) - for chunk_imports_by_module_file in import_scanning_jobs: - imports_by_module_file.update(chunk_imports_by_module_file) - return imports_by_module_file + import_scanning_jobs = joblib.Parallel(n_jobs=number_of_processes)( + joblib.delayed(_scan_chunk)(import_scanner, exclude_type_checking_imports, chunk) + for chunk in chunks + ) + + imports_by_module_file = {} + for chunk_imports_by_module_file in import_scanning_jobs: + imports_by_module_file.update(chunk_imports_by_module_file) + return imports_by_module_file def _scan_chunk( diff --git a/tox.ini b/tox.ini index 3bdb2cb9..3027d938 100644 --- a/tox.ini +++ b/tox.ini @@ -4,6 +4,20 @@ envlist = check, docs, {py39,py310,py311,py312,py13}, + py13-joblib-earliest, + +[base] +deps = + pytest==7.4.4 + pyyaml==6.0.1 + pytest-cov==5.0.0 + pytest-benchmark==4.0.0 + # External packages to attempt to build the graph from. + Django==4.2.17 # N.B. Django 5 doesn't support Python 3.9. + flask==3.0.3 + requests==2.32.3 + sqlalchemy==2.0.35 + google-cloud-audit-log==0.3.0 [testenv] basepython = @@ -12,6 +26,7 @@ basepython = py311: {env:TOXPYTHON:python3.11} py312: {env:TOXPYTHON:python3.12} py313: {env:TOXPYTHON:python3.13} + py313-joblib-earliest: {env:TOXPYTHON:python3.13} {clean,check,docs,report}: {env:TOXPYTHON:python3} setenv = PYTHONPATH={toxinidir}/tests @@ -20,19 +35,16 @@ passenv = * usedevelop = false deps = - pytest==7.4.4 - pyyaml==6.0.1 - pytest-cov==5.0.0 - pytest-benchmark==4.0.0 - # External packages to attempt to build the graph from. - Django==4.2.17 # N.B. Django 5 doesn't support Python 3.9. - flask==3.0.3 - requests==2.32.3 - sqlalchemy==2.0.35 - google-cloud-audit-log==0.3.0 + {[base]deps} + joblib==1.4.2 commands = {posargs:pytest --cov --cov-report=term-missing --benchmark-skip -vv tests} +[testenv:py313-joblib-earliest] +deps = + {[base]deps} + joblib==1.3.0 + [testenv:check] basepython = py313 deps = @@ -107,4 +119,4 @@ python = 3.10: py310, report 3.11: py311, report 3.12: py312, report - 3.13: py313, report, check, docs + 3.13: py313, py313-joblib-earliest, report, check, docs