Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
Changelog
=========

latest
------

* Provide more control of multiprocessing via ``GRIMP_MIN_MULTIPROCESSING_MODULES``
environment variable.

3.8.1 (2025-04-23)
------------------

Expand Down
7 changes: 6 additions & 1 deletion docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,12 @@ Building the graph
:param str, optional cache_dir: The directory to use for caching the graph. Defaults to ``.grimp_cache``. To disable caching,
pass ``None``. See :doc:`caching`.
:return: An import graph that you can use to analyse the package.
:rtype: ImportGraph
:rtype: ``ImportGraph``

This method uses multiple operating system processes to build the graph, if the number of modules to scan (not
including modules in the cache) is 50 or more. This threshold can be adjusted by setting the ``GRIMP_MIN_MULTIPROCESSING_MODULES``
environment variable to a different number. To disable multiprocessing altogether, set it to a large number (more than
the number of modules in the codebase being analyzed).

.. _typing module documentation: https://docs.python.org/3/library/typing.html#typing.TYPE_CHECKING

Expand Down
15 changes: 13 additions & 2 deletions src/grimp/application/usecases.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,19 @@
from ..application.ports.packagefinder import AbstractPackageFinder
from ..domain.valueobjects import DirectImport, Module
from .config import settings
import os


class NotSupplied:
pass


# Calling code can set this environment variable if it wants to tune when to switch to
# multiprocessing, or set it to a large number to disable it altogether.
MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME = "GRIMP_MIN_MULTIPROCESSING_MODULES"
# This is an arbitrary number, but setting it too low slows down our functional tests considerably.
MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING = 50
# If you change this, update docs/usage.rst too!
DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING = 50


def build_graph(
Expand Down Expand Up @@ -238,7 +243,13 @@ def _create_chunks(module_files: Collection[ModuleFile]) -> tuple[tuple[ModuleFi


def _decide_number_of_processes(number_of_module_files: int) -> int:
if number_of_module_files < MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING:
min_number_of_modules = int(
os.environ.get(
MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME,
DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING,
)
)
if number_of_module_files < min_number_of_modules:
# Don't incur the overhead of multiple processes.
return 1
return min(joblib.cpu_count(), number_of_module_files)
Expand Down
16 changes: 16 additions & 0 deletions tests/adaptors/modulefinder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from grimp.application.ports.modulefinder import AbstractModuleFinder, FoundPackage, ModuleFile
from grimp.application.ports.filesystem import AbstractFileSystem
from typing import FrozenSet, Dict


class BaseFakeModuleFinder(AbstractModuleFinder):
module_files_by_package_name: Dict[str, FrozenSet[ModuleFile]] = {}

def find_package(
self, package_name: str, package_directory: str, file_system: AbstractFileSystem
) -> FoundPackage:
return FoundPackage(
name=package_name,
directory=package_directory,
module_files=self.module_files_by_package_name[package_name],
)
3 changes: 2 additions & 1 deletion tests/functional/test_build_and_use_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from unittest.mock import patch
from grimp.application import usecases


"""
For ease of reference, these are the imports of all the files:

Expand Down Expand Up @@ -55,7 +56,7 @@ def test_modules():
}


@patch.object(usecases, "MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING", 0)
@patch.object(usecases, "DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING", 0)
def test_modules_multiprocessing():
"""
This test runs relatively slowly, but it's important we cover the multiprocessing code.
Expand Down
79 changes: 78 additions & 1 deletion tests/unit/application/test_usecases.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
from typing import Dict, Optional, Set
from unittest.mock import sentinel
from unittest.mock import sentinel, patch

import joblib # type: ignore
import pytest # type: ignore

from grimp.application import usecases
Expand All @@ -9,8 +11,11 @@
from grimp.domain.valueobjects import DirectImport, Module
from tests.adaptors.filesystem import FakeFileSystem
from tests.adaptors.packagefinder import BaseFakePackageFinder
from tests.adaptors.modulefinder import BaseFakeModuleFinder
from tests.config import override_settings

SOME_CPU_COUNT = 8


class TestBuildGraph:
@pytest.mark.parametrize("include_external_packages", (True, False))
Expand Down Expand Up @@ -131,3 +136,75 @@ def write(
if supplied_cache_dir is not sentinel.not_supplied:
kwargs["cache_dir"] = supplied_cache_dir
usecases.build_graph("mypackage", **kwargs)

@patch.object(usecases, "_scan_chunks", return_value={})
@patch.object(joblib, "cpu_count", return_value=SOME_CPU_COUNT)
@pytest.mark.parametrize(
"number_of_modules, fake_environ, expected_number_of_chunks",
[
(
usecases.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING - 1,
{},
1,
),
(
usecases.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING,
{},
SOME_CPU_COUNT,
),
(
usecases.DEFAULT_MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING + 1,
{},
SOME_CPU_COUNT,
),
(
149,
{usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150},
1,
),
(
150,
{usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150},
SOME_CPU_COUNT,
),
(
151,
{usecases.MIN_NUMBER_OF_MODULES_TO_SCAN_USING_MULTIPROCESSING_ENV_NAME: 150},
SOME_CPU_COUNT,
),
],
)
def test_scanning_multiprocessing_respects_min_number_of_modules(
self,
mock_cpu_count,
mock_scan_chunks,
number_of_modules,
fake_environ,
expected_number_of_chunks,
):
class FakePackageFinder(BaseFakePackageFinder):
directory_map = {"mypackage": "/path/to/mypackage"}

class FakeModuleFinder(BaseFakeModuleFinder):
module_files_by_package_name = {
"mypackage": frozenset(
{
ModuleFile(
module=Module(f"mypackage.mod_{i}"),
mtime=999,
)
for i in range(number_of_modules)
}
)
}

with override_settings(
FILE_SYSTEM=FakeFileSystem(),
PACKAGE_FINDER=FakePackageFinder(),
MODULE_FINDER=FakeModuleFinder(),
), patch.object(os, "environ", fake_environ):
usecases.build_graph("mypackage", cache_dir=None)

[call] = mock_scan_chunks.call_args_list
chunks = call.args[0]
assert len(chunks) == expected_number_of_chunks
Loading