Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ Changelog
Unreleased
----------

* Use joblib instead of multiprocessing for CPU parallelism. Fixes https://github.com/seddonym/grimp/issues/208.

3.8 (2025-04-11)
----------------

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ authors = [
]
requires-python = ">=3.9"
dependencies = [
"joblib>=1.3.0",
"typing-extensions>=3.10.0.0",
]
classifiers = [
Expand Down
34 changes: 15 additions & 19 deletions src/grimp/application/usecases.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
"""

from typing import Dict, Sequence, Set, Type, Union, cast, Iterable, Collection
import multiprocessing
import math

import joblib # type: ignore

from ..application.ports import caching
from ..application.ports.filesystem import AbstractFileSystem
from ..application.ports.graph import ImportGraph
Expand Down Expand Up @@ -228,19 +229,19 @@ def _create_chunks(module_files: Collection[ModuleFile]) -> tuple[tuple[ModuleFi
module_files_tuple = tuple(module_files)

number_of_module_files = len(module_files_tuple)
n_chunks = _decide_number_of_of_processes(number_of_module_files)
n_chunks = _decide_number_of_processes(number_of_module_files)
chunk_size = math.ceil(number_of_module_files / n_chunks)

return tuple(
module_files_tuple[i * chunk_size : (i + 1) * chunk_size] for i in range(n_chunks)
)


def _decide_number_of_of_processes(number_of_module_files: int) -> int:
def _decide_number_of_processes(number_of_module_files: int) -> int:
if number_of_module_files < MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING:
# Don't incur the overhead of multiprocessing.
# Don't incur the overhead of multiple processes.
return 1
return min(multiprocessing.cpu_count(), number_of_module_files)
return min(joblib.cpu_count(), number_of_module_files)


def _scan_chunks(
Expand All @@ -257,20 +258,15 @@ def _scan_chunks(
)

number_of_processes = len(chunks)
if number_of_processes == 1:
# No need to spawn a process if there's only one chunk.
[chunk] = chunks
return _scan_chunk(import_scanner, exclude_type_checking_imports, chunk)
else:
with multiprocessing.Pool(number_of_processes) as pool:
imports_by_module_file: Dict[ModuleFile, Set[DirectImport]] = {}
import_scanning_jobs = pool.starmap(
_scan_chunk,
[(import_scanner, exclude_type_checking_imports, chunk) for chunk in chunks],
)
for chunk_imports_by_module_file in import_scanning_jobs:
imports_by_module_file.update(chunk_imports_by_module_file)
return imports_by_module_file
import_scanning_jobs = joblib.Parallel(n_jobs=number_of_processes)(
joblib.delayed(_scan_chunk)(import_scanner, exclude_type_checking_imports, chunk)
for chunk in chunks
)

imports_by_module_file = {}
for chunk_imports_by_module_file in import_scanning_jobs:
imports_by_module_file.update(chunk_imports_by_module_file)
return imports_by_module_file


def _scan_chunk(
Expand Down
34 changes: 23 additions & 11 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@ envlist =
check,
docs,
{py39,py310,py311,py312,py13},
py13-joblib-earliest,

[base]
deps =
pytest==7.4.4
pyyaml==6.0.1
pytest-cov==5.0.0
pytest-benchmark==4.0.0
# External packages to attempt to build the graph from.
Django==4.2.17 # N.B. Django 5 doesn't support Python 3.9.
flask==3.0.3
requests==2.32.3
sqlalchemy==2.0.35
google-cloud-audit-log==0.3.0

[testenv]
basepython =
Expand All @@ -12,6 +26,7 @@ basepython =
py311: {env:TOXPYTHON:python3.11}
py312: {env:TOXPYTHON:python3.12}
py313: {env:TOXPYTHON:python3.13}
py313-joblib-earliest: {env:TOXPYTHON:python3.13}
{clean,check,docs,report}: {env:TOXPYTHON:python3}
setenv =
PYTHONPATH={toxinidir}/tests
Expand All @@ -20,19 +35,16 @@ passenv =
*
usedevelop = false
deps =
pytest==7.4.4
pyyaml==6.0.1
pytest-cov==5.0.0
pytest-benchmark==4.0.0
# External packages to attempt to build the graph from.
Django==4.2.17 # N.B. Django 5 doesn't support Python 3.9.
flask==3.0.3
requests==2.32.3
sqlalchemy==2.0.35
google-cloud-audit-log==0.3.0
{[base]deps}
joblib==1.4.2
commands =
{posargs:pytest --cov --cov-report=term-missing --benchmark-skip -vv tests}

[testenv:py313-joblib-earliest]
deps =
{[base]deps}
joblib==1.3.0

[testenv:check]
basepython = py313
deps =
Expand Down Expand Up @@ -107,4 +119,4 @@ python =
3.10: py310, report
3.11: py311, report
3.12: py312, report
3.13: py313, report, check, docs
3.13: py313, py313-joblib-earliest, report, check, docs
Loading