From ac95e4b5e6bbadc941f1fb070b6a8f060ddca97f Mon Sep 17 00:00:00 2001 From: Peter Byfield Date: Wed, 12 Feb 2025 05:18:03 +0100 Subject: [PATCH 1/5] Add ModuleExpression --- docs/usage.rst | 20 ++++ rust/Cargo.lock | 83 +++++++++++++- rust/Cargo.toml | 2 + rust/src/errors.rs | 8 +- rust/src/exceptions.rs | 5 + rust/src/lib.rs | 7 +- rust/src/module_expressions.rs | 192 +++++++++++++++++++++++++++++++++ src/grimp/exceptions.py | 4 + 8 files changed, 316 insertions(+), 5 deletions(-) create mode 100644 rust/src/module_expressions.rs diff --git a/docs/usage.rst b/docs/usage.rst index ba336efa..f549e5a0 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -517,5 +517,25 @@ Methods for manipulating the graph :param str module: The name of a module, for example ``'mypackage.foo'``. :return: bool +.. _module_expressions: + +Module expressions +------------------ + + A module expression is used to refer to sets of modules. + + - ``*`` stands in for a module name, without including subpackages. + - ``**`` includes subpackages too. + + Examples: + + - ``mypackage.foo``: matches ``mypackage.foo`` exactly. + - ``mypackage.*``: matches ``mypackage.foo`` but not ``mypackage.foo.bar``. + - ``mypackage.*.baz``: matches ``mypackage.foo.baz`` but not ``mypackage.foo.bar.baz``. + - ``mypackage.*.*``: matches ``mypackage.foo.bar`` and ``mypackage.foobar.baz``. + - ``mypackage.**``: matches ``mypackage.foo.bar`` and ``mypackage.foo.bar.baz``. + - ``mypackage.**.qux``: matches ``mypackage.foo.bar.qux`` and ``mypackage.foo.bar.baz.qux``. + - ``mypackage.foo*``: is not a valid expression. (The wildcard must replace a whole module name.) + .. _namespace packages: https://docs.python.org/3/glossary.html#term-namespace-package .. _namespace portion: https://docs.python.org/3/glossary.html#term-portion \ No newline at end of file diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 05270e54..9414ee01 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -9,11 +9,13 @@ dependencies = [ "bimap", "derive-new", "getset", - "indexmap", + "indexmap 2.7.1", "itertools", "lazy_static", + "parameterized", "pyo3", "rayon", + "regex", "rustc-hash", "serde_json", "slotmap", @@ -22,6 +24,15 @@ dependencies = [ "thiserror", ] +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + [[package]] name = "autocfg" version = "1.4.0" @@ -106,6 +117,12 @@ dependencies = [ "syn", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "hashbrown" version = "0.15.2" @@ -121,6 +138,16 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + [[package]] name = "indexmap" version = "2.7.1" @@ -128,7 +155,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.15.2", ] [[package]] @@ -185,6 +212,27 @@ version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "parameterized" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "194bf497674dda552d4f1bd24d325f828f425876c9d522fcb1810cd527e0bd4e" +dependencies = [ + "parameterized-macro", +] + +[[package]] +name = "parameterized-macro" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1374bca5edab7a317c4ffbc9df1e239ceb7dcf5426b6b403474408442a9777ac" +dependencies = [ + "indexmap 1.9.3", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "portable-atomic" version = "1.10.0" @@ -314,6 +362,35 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "regex" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + [[package]] name = "rustc-hash" version = "2.1.0" @@ -373,7 +450,7 @@ version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a3275464d7a9f2d4cac57c89c2ef96a8524dba2864c8d6f82e3980baf136f9b" dependencies = [ - "hashbrown", + "hashbrown 0.15.2", "serde", ] diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 048d453e..6fe24041 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -20,6 +20,7 @@ itertools = "0.14.0" tap = "1.0.1" rustc-hash = "2.1.0" indexmap = "2.7.1" +regex = "1.11.1" [dependencies.pyo3] version = "0.23.4" @@ -29,4 +30,5 @@ extension-module = ["pyo3/extension-module"] default = ["extension-module"] [dev-dependencies] +parameterized = "2.0.0" serde_json = "1.0.137" diff --git a/rust/src/errors.rs b/rust/src/errors.rs index e1ac1ef9..29228eb1 100644 --- a/rust/src/errors.rs +++ b/rust/src/errors.rs @@ -1,4 +1,4 @@ -use crate::exceptions::{ModuleNotPresent, NoSuchContainer}; +use crate::exceptions::{InvalidModuleExpression, ModuleNotPresent, NoSuchContainer}; use pyo3::exceptions::PyValueError; use pyo3::PyErr; use thiserror::Error; @@ -13,6 +13,9 @@ pub enum GrimpError { #[error("Modules have shared descendants.")] SharedDescendants, + + #[error("{0} is not a valid module expression.")] + InvalidModuleExpression(String), } pub type GrimpResult = Result; @@ -24,6 +27,9 @@ impl From for PyErr { GrimpError::ModuleNotPresent(_) => ModuleNotPresent::new_err(value.to_string()), GrimpError::NoSuchContainer(_) => NoSuchContainer::new_err(value.to_string()), GrimpError::SharedDescendants => PyValueError::new_err(value.to_string()), + GrimpError::InvalidModuleExpression(_) => { + InvalidModuleExpression::new_err(value.to_string()) + } } } } diff --git a/rust/src/exceptions.rs b/rust/src/exceptions.rs index a8aa3e74..833cd0dd 100644 --- a/rust/src/exceptions.rs +++ b/rust/src/exceptions.rs @@ -2,3 +2,8 @@ use pyo3::create_exception; create_exception!(_rustgrimp, ModuleNotPresent, pyo3::exceptions::PyException); create_exception!(_rustgrimp, NoSuchContainer, pyo3::exceptions::PyException); +create_exception!( + _rustgrimp, + InvalidModuleExpression, + pyo3::exceptions::PyException +); diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 49a97a5b..c288d70a 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -1,9 +1,10 @@ pub mod errors; pub mod exceptions; pub mod graph; +pub mod module_expressions; use crate::errors::{GrimpError, GrimpResult}; -use crate::exceptions::{ModuleNotPresent, NoSuchContainer}; +use crate::exceptions::{InvalidModuleExpression, ModuleNotPresent, NoSuchContainer}; use crate::graph::higher_order_queries::Level; use crate::graph::{Graph, Module, ModuleIterator, ModuleTokenIterator}; use derive_new::new; @@ -21,6 +22,10 @@ fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add("ModuleNotPresent", py.get_type::())?; m.add("NoSuchContainer", py.get_type::())?; + m.add( + "InvalidModuleExpression", + py.get_type::(), + )?; Ok(()) } diff --git a/rust/src/module_expressions.rs b/rust/src/module_expressions.rs new file mode 100644 index 00000000..51e5d2d4 --- /dev/null +++ b/rust/src/module_expressions.rs @@ -0,0 +1,192 @@ +use crate::errors::{GrimpError, GrimpResult}; +use itertools::Itertools; +use lazy_static::lazy_static; +use regex::Regex; +use std::fmt::Display; +use std::str::FromStr; + +lazy_static! { + static ref MODULE_EXPRESSION_PATTERN: Regex = + Regex::new(r"^(\w+|\*{1,2})(\.(\w+|\*{1,2}))*$").unwrap(); +} + +/// A module expression is used to refer to sets of modules. +/// +/// - `*` stands in for a module name, without including subpackages. +/// - `**` includes subpackages too. +#[derive(Debug, Clone)] +pub struct ModuleExpression { + expression: String, + pattern: Regex, +} + +impl Display for ModuleExpression { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.expression) + } +} + +impl AsRef for ModuleExpression { + fn as_ref(&self) -> &str { + &self.expression + } +} + +impl FromStr for ModuleExpression { + type Err = GrimpError; + + fn from_str(expression: &str) -> Result { + if !MODULE_EXPRESSION_PATTERN.is_match(expression) { + return Err(GrimpError::InvalidModuleExpression(expression.to_owned())); + } + + for (part, next_part) in expression.split(".").tuple_windows() { + match (part, next_part) { + ("*", "**") | ("**", "*") | ("**", "**") => { + return Err(GrimpError::InvalidModuleExpression(expression.to_owned())) + } + _ => {} + } + } + + Ok(Self { + expression: expression.to_owned(), + pattern: Self::create_pattern(expression)?, + }) + } +} + +impl ModuleExpression { + pub fn is_match(&self, module_name: &str) -> bool { + self.pattern.is_match(module_name) + } + + fn create_pattern(expression: &str) -> GrimpResult { + let mut pattern_parts = vec![]; + + for part in expression.split(".") { + if part == "*" { + pattern_parts.push(part.replace("*", r"[^\.]+")) + } else if part == "**" { + pattern_parts.push(part.replace("**", r"[^\.]+(?:\.[^\.]+)*?")) + } else { + pattern_parts.push(part.to_owned()); + } + } + + Regex::new(&(r"^".to_owned() + &pattern_parts.join(r".") + r"$")) + .map_err(|_| GrimpError::InvalidModuleExpression(expression.to_owned())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use derive_new::new; + use parameterized::parameterized; + + #[derive(Debug, new)] + struct NewModuleExpressionTestCase { + #[new(into)] + expression: String, + expect_valid: bool, + } + + #[parameterized(case = { + // Valid + NewModuleExpressionTestCase::new(r"foo", true), + NewModuleExpressionTestCase::new(r"foo_bar_123", true), + NewModuleExpressionTestCase::new(r"foo.bar", true), + NewModuleExpressionTestCase::new(r"foo.*", true), + NewModuleExpressionTestCase::new(r"foo.**", true), + NewModuleExpressionTestCase::new(r"foo.*.bar", true), + NewModuleExpressionTestCase::new(r"foo.**.bar", true), + NewModuleExpressionTestCase::new(r"*.foo", true), + NewModuleExpressionTestCase::new(r"**.foo", true), + NewModuleExpressionTestCase::new(r"foo.*.bar.**", true), + NewModuleExpressionTestCase::new(r"foo.**.bar.*", true), + NewModuleExpressionTestCase::new(r"foo.*.*.bar", true), + // Invalid + NewModuleExpressionTestCase::new(r"foo.bar*", false), + NewModuleExpressionTestCase::new(r".foo", false), + NewModuleExpressionTestCase::new(r"foo.", false), + NewModuleExpressionTestCase::new(r"foo..bar", false), + NewModuleExpressionTestCase::new(r"foo.***", false), + NewModuleExpressionTestCase::new(r"foo ", false), + NewModuleExpressionTestCase::new(r"foo .bar", false), + NewModuleExpressionTestCase::new(r"foo. *.bar", false), + NewModuleExpressionTestCase::new(r"foo.*.**.bar", false), + NewModuleExpressionTestCase::new(r"foo.**.*.bar", false), + NewModuleExpressionTestCase::new(r"foo.**.**.bar", false), + })] + fn test_parse(case: NewModuleExpressionTestCase) -> GrimpResult<()> { + let module_expression = case.expression.parse::(); + if case.expect_valid { + assert!(module_expression.is_ok()); + } else { + assert!(module_expression.is_err()); + assert!(matches!( + module_expression.unwrap_err(), + GrimpError::InvalidModuleExpression(_) + )) + } + Ok(()) + } + + #[derive(Debug, new)] + struct MatchesModuleNameTestCase { + #[new(into)] + expression: String, + #[new(into)] + module_name: String, + expect_match: bool, + } + + #[parameterized(case = { + // Exact match. + MatchesModuleNameTestCase::new(r"foo", "foo", true), + MatchesModuleNameTestCase::new(r"foo", "bar", false), + // Exact match with dot. + MatchesModuleNameTestCase::new(r"foo.bar", "foo.bar", true), + MatchesModuleNameTestCase::new(r"foo.bar", "foo.baz", false), + // Single wildcard at end + MatchesModuleNameTestCase::new(r"foo.*", "foo.bar", true), + MatchesModuleNameTestCase::new(r"foo.*", "foo", false), + MatchesModuleNameTestCase::new(r"foo.*", "foo.bar.baz", false), + // Double wildcard at end + MatchesModuleNameTestCase::new(r"foo.**", "foo.bar", true), + MatchesModuleNameTestCase::new(r"foo.**", "foo", false), + MatchesModuleNameTestCase::new(r"foo.**", "foo.bar.baz", true), + // Single wildcard in the middle + MatchesModuleNameTestCase::new(r"foo.*.baz", "foo.bar.baz", true), + MatchesModuleNameTestCase::new(r"foo.*.baz", "foo.bar.bax.baz", false), + // Double wildcard in the middle + MatchesModuleNameTestCase::new(r"foo.**.baz", "foo.bar.baz", true), + MatchesModuleNameTestCase::new(r"foo.**.baz", "foo.bar.bax.baz", true), + // Single wildcard at start + MatchesModuleNameTestCase::new(r"*.foo", "bar.foo", true), + MatchesModuleNameTestCase::new(r"*.foo", "foo", false), + MatchesModuleNameTestCase::new(r"*.foo", "bar.baz.foo", false), + // Double wildcard at start + MatchesModuleNameTestCase::new(r"**.foo", "bar.foo", true), + MatchesModuleNameTestCase::new(r"**.foo", "foo", false), + MatchesModuleNameTestCase::new(r"**.foo", "bar.baz.foo", true), + // Multiple single wildcards + MatchesModuleNameTestCase::new(r"foo.*.*.bar", "foo.a.b.bar", true), + MatchesModuleNameTestCase::new(r"foo.*.*.bar", "foo.a.bar", false), + MatchesModuleNameTestCase::new(r"foo.*.*.bar", "foo.a.b.c.bar", false), + // Mixing single and double wildcards + MatchesModuleNameTestCase::new(r"foo.**.bar.*", "foo.a.bar.b", true), + MatchesModuleNameTestCase::new(r"foo.**.bar.*", "foo.a.b.bar.c", true), + MatchesModuleNameTestCase::new(r"foo.**.bar.*", "foo.bar", false), + MatchesModuleNameTestCase::new(r"foo.**.bar.*", "foo.a.bar.b.c", false), + })] + fn test_is_match(case: MatchesModuleNameTestCase) -> GrimpResult<()> { + let module_expression: ModuleExpression = case.expression.parse()?; + assert_eq!( + module_expression.is_match(&case.module_name), + case.expect_match + ); + Ok(()) + } +} diff --git a/src/grimp/exceptions.py b/src/grimp/exceptions.py index 56e4e8c1..bea4b98b 100644 --- a/src/grimp/exceptions.py +++ b/src/grimp/exceptions.py @@ -61,3 +61,7 @@ def __eq__(self, other): other.lineno, other.text, ) + + +class InvalidModuleExpression(GrimpException): + pass From 16b9e0c88b987fab614b16517a2cdef2d9f8c8bb Mon Sep 17 00:00:00 2001 From: Peter Byfield Date: Thu, 13 Feb 2025 21:42:53 +0100 Subject: [PATCH 2/5] Add find_matching_modules to Graph --- docs/usage.rst | 10 ++++ rust/src/graph/hierarchy_queries.rs | 60 +++++++++++++++++++-- rust/src/lib.rs | 11 ++++ rust/src/module_expressions.rs | 2 +- src/grimp/adaptors/graph.py | 8 ++- src/grimp/application/ports/graph.py | 34 ++++++++++++ tests/benchmarking/test_benchmarking.py | 7 +++ tests/unit/adaptors/graph/test_hierarchy.py | 34 +++++++++++- 8 files changed, 159 insertions(+), 7 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index f549e5a0..b1b53b33 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -121,6 +121,16 @@ Methods for analysing the module tree :raises: ``ValueError`` if the module is a squashed module, as by definition it represents both itself and all of its descendants. +.. py:function:: ImportGraph.find_matching_modules(expression) + + Find all modules matching the passed expression (see :ref:`module_expressions`). + + :param str expression: A module expression used for matching. + :return: A set of module names matching the expression. + :rtype: A set of strings. + :raises: ``grimp.exceptions.InvalidModuleExpression`` if the module expression is invalid. + + Methods for analysing direct imports ------------------------------------ diff --git a/rust/src/graph/hierarchy_queries.rs b/rust/src/graph/hierarchy_queries.rs index 41cdf570..dacacf0b 100644 --- a/rust/src/graph/hierarchy_queries.rs +++ b/rust/src/graph/hierarchy_queries.rs @@ -1,4 +1,6 @@ -use crate::graph::{Graph, Module, ModuleToken, MODULE_NAMES}; +use crate::graph::{Graph, Module, ModuleIterator, ModuleToken, MODULE_NAMES}; +use crate::module_expressions::ModuleExpression; +use rustc_hash::FxHashSet; impl Graph { pub fn get_module_by_name(&self, name: &str) -> Option<&Module> { @@ -15,7 +17,7 @@ impl Graph { } // TODO(peter) Guarantee order? - pub fn all_modules(&self) -> impl Iterator { + pub fn all_modules(&self) -> impl ModuleIterator { self.modules.values() } @@ -26,7 +28,7 @@ impl Graph { } } - pub fn get_module_children(&self, module: ModuleToken) -> impl Iterator { + pub fn get_module_children(&self, module: ModuleToken) -> impl ModuleIterator { let children = match self.module_children.get(module) { Some(children) => children .iter() @@ -40,11 +42,61 @@ impl Graph { /// Returns an iterator over the passed modules descendants. /// /// Parent modules will be yielded before their child modules. - pub fn get_module_descendants(&self, module: ModuleToken) -> impl Iterator { + pub fn get_module_descendants(&self, module: ModuleToken) -> impl ModuleIterator { let mut descendants = self.get_module_children(module).collect::>(); for child in descendants.clone() { descendants.extend(self.get_module_descendants(child.token).collect::>()) } descendants.into_iter() } + + pub fn find_matching_modules(&self, expression: &ModuleExpression) -> impl ModuleIterator { + let interner = MODULE_NAMES.read().unwrap(); + let modules: FxHashSet<_> = self + .modules + .values() + .filter(|m| expression.is_match(interner.resolve(m.interned_name).unwrap())) + .collect(); + modules.into_iter() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::graph::ModuleIterator; + use derive_new::new; + use parameterized::parameterized; + + #[derive(Debug, new)] + struct FindMatchingModulesTestCase<'a> { + expression: &'a str, + #[new(into)] + expected_matching_modules: Vec<&'a str>, + } + + #[parameterized(case = { + FindMatchingModulesTestCase::new("foo", ["foo"]), + FindMatchingModulesTestCase::new("foo.*", ["foo.bar"]), + FindMatchingModulesTestCase::new("foo.**", ["foo.bar", "foo.bar.baz"]), + })] + fn test_find_matching_modules(case: FindMatchingModulesTestCase) { + let mut graph = Graph::default(); + graph.get_or_add_module("foo"); + graph.get_or_add_module("foo.bar"); + graph.get_or_add_module("foo.bar.baz"); + + let expression = case.expression.parse().unwrap(); + + let matching_modules: FxHashSet<_> = + graph.find_matching_modules(&expression).tokens().collect(); + + assert_eq!( + matching_modules, + case.expected_matching_modules + .into_iter() + .map(|module| graph.get_module_by_name(module).unwrap().token) + .collect() + ); + } } diff --git a/rust/src/lib.rs b/rust/src/lib.rs index c288d70a..ef238439 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -7,6 +7,7 @@ use crate::errors::{GrimpError, GrimpResult}; use crate::exceptions::{InvalidModuleExpression, ModuleNotPresent, NoSuchContainer}; use crate::graph::higher_order_queries::Level; use crate::graph::{Graph, Module, ModuleIterator, ModuleTokenIterator}; +use crate::module_expressions::ModuleExpression; use derive_new::new; use itertools::Itertools; use pyo3::exceptions::PyValueError; @@ -174,6 +175,16 @@ impl GraphWrapper { .collect()) } + pub fn find_matching_modules(&self, expression: &str) -> PyResult> { + let expression: ModuleExpression = expression.parse()?; + Ok(self + ._graph + .find_matching_modules(&expression) + .visible() + .names() + .collect()) + } + #[pyo3(signature = (*, importer, imported, as_packages = false))] pub fn direct_import_exists( &self, diff --git a/rust/src/module_expressions.rs b/rust/src/module_expressions.rs index 51e5d2d4..00b675fe 100644 --- a/rust/src/module_expressions.rs +++ b/rust/src/module_expressions.rs @@ -11,7 +11,7 @@ lazy_static! { } /// A module expression is used to refer to sets of modules. -/// +/// /// - `*` stands in for a module name, without including subpackages. /// - `**` includes subpackages too. #[derive(Debug, Clone)] diff --git a/src/grimp/adaptors/graph.py b/src/grimp/adaptors/graph.py index 4730ef45..8df52576 100644 --- a/src/grimp/adaptors/graph.py +++ b/src/grimp/adaptors/graph.py @@ -4,7 +4,7 @@ from grimp.domain.analysis import PackageDependency, Route from grimp.domain.valueobjects import Layer from grimp import _rustgrimp as rust # type: ignore[attr-defined] -from grimp.exceptions import ModuleNotPresent, NoSuchContainer +from grimp.exceptions import ModuleNotPresent, NoSuchContainer, InvalidModuleExpression from grimp.application.ports import graph @@ -24,6 +24,12 @@ def modules(self) -> Set[str]: self._cached_modules = self._rustgraph.get_modules() return self._cached_modules + def find_matching_modules(self, expression: str) -> Set[str]: + try: + return self._rustgraph.find_matching_modules(expression) + except rust.InvalidModuleExpression as e: + raise InvalidModuleExpression(str(e)) from e + def add_module(self, module: str, is_squashed: bool = False) -> None: self._cached_modules = None self._rustgraph.add_module(module, is_squashed) diff --git a/src/grimp/application/ports/graph.py b/src/grimp/application/ports/graph.py index acf39a84..0b33235c 100644 --- a/src/grimp/application/ports/graph.py +++ b/src/grimp/application/ports/graph.py @@ -32,6 +32,40 @@ def modules(self) -> Set[str]: """ raise NotImplementedError + @abc.abstractmethod + def find_matching_modules(self, expression: str) -> Set[str]: + """ + Find all modules matching the passed expression. + + Args: + expression: A module expression used for matching. + Returns: + A set of module names matching the expression. + Raises: + InvalidModuleExpression if the passed expression is invalid. + + Module Expressions + ================== + + A module expression is used to refer to sets of modules. + + - ``*`` stands in for a module name, without including subpackages. + - ``**`` includes subpackages too. + + Examples + -------- + + - ``mypackage.foo``: matches ``mypackage.foo`` exactly. + - ``mypackage.*``: matches ``mypackage.foo`` but not ``mypackage.foo.bar``. + - ``mypackage.*.baz``: matches ``mypackage.foo.baz`` but not ``mypackage.foo.bar.baz``. + - ``mypackage.*.*``: matches ``mypackage.foo.bar`` and ``mypackage.foobar.baz``. + - ``mypackage.**``: matches ``mypackage.foo.bar`` and ``mypackage.foo.bar.baz``. + - ``mypackage.**.qux``: matches ``mypackage.foo.bar.qux`` and ``mypackage.foo.bar.baz.qux``. + - ``mypackage.foo*``: is not a valid expression. (The wildcard must replace a whole module + name.) + """ + raise NotImplementedError + @abc.abstractmethod def add_module(self, module: str, is_squashed: bool = False) -> None: """ diff --git a/tests/benchmarking/test_benchmarking.py b/tests/benchmarking/test_benchmarking.py index b855f7d6..509de55b 100644 --- a/tests/benchmarking/test_benchmarking.py +++ b/tests/benchmarking/test_benchmarking.py @@ -462,3 +462,10 @@ def f(): graph.get_import_details(importer=f"blue_{i}", imported=f"green_{i}") _run_benchmark(benchmark, f) + + +def test_find_matching_modules(benchmark, large_graph): + matching_modules = _run_benchmark( + benchmark, lambda: large_graph.find_matching_modules("mypackage.domain.**") + ) + assert len(matching_modules) == 2519 diff --git a/tests/unit/adaptors/graph/test_hierarchy.py b/tests/unit/adaptors/graph/test_hierarchy.py index 8053b40e..7612e3f8 100644 --- a/tests/unit/adaptors/graph/test_hierarchy.py +++ b/tests/unit/adaptors/graph/test_hierarchy.py @@ -1,7 +1,7 @@ import pytest # type: ignore from grimp.adaptors.graph import ImportGraph -from grimp.exceptions import ModuleNotPresent +from grimp.exceptions import ModuleNotPresent, InvalidModuleExpression @pytest.mark.parametrize( @@ -115,3 +115,35 @@ def test_find_descendants_works_if_modules_added_in_different_order(): "mypackage.foo.blue.alpha", "mypackage.foo.blue.alpha.one", } + + +class TestFindMatchingModules: + @pytest.mark.parametrize( + "expression, expected_matching_modules", + [ + ["foo", {"foo"}], + ["foo.*", {"foo.bar"}], + ["foo.**", {"foo.bar", "foo.bar.baz"}], + ], + ) + def test_finds_matching_modules(self, expression, expected_matching_modules): + graph = ImportGraph() + graph.add_module("foo") + graph.add_module("foo.bar") + graph.add_module("foo.bar.baz") + assert graph.find_matching_modules(expression) == expected_matching_modules + + def test_does_not_return_invisible_modules(self): + graph = ImportGraph() + # "foo" and "foo.bar" will be invisible modules in the graph. + graph.add_module("foo.bar.baz") + + # "foo.bar" is not returned. + assert graph.find_matching_modules("foo.**") == {"foo.bar.baz"} + + def test_raises_error_if_expression_is_invalid(self): + graph = ImportGraph() + with pytest.raises( + InvalidModuleExpression, match="foo.. is not a valid module expression." + ): + graph.find_matching_modules("foo..") From 88f8cf4acc4dc849ed6475633bc2fdadec416300 Mon Sep 17 00:00:00 2001 From: Peter Byfield Date: Fri, 14 Feb 2025 00:05:30 +0100 Subject: [PATCH 3/5] Add find_matching_direct_imports to graph --- docs/usage.rst | 21 ++++ rust/src/graph/direct_import_queries.rs | 71 +++++++++++++- rust/src/lib.rs | 41 ++++++++ src/grimp/__init__.py | 3 +- src/grimp/adaptors/graph.py | 12 ++- src/grimp/application/ports/graph.py | 34 ++++++- tests/benchmarking/test_benchmarking.py | 10 ++ .../adaptors/graph/test_direct_imports.py | 98 +++++++++++++++++++ 8 files changed, 286 insertions(+), 4 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index b1b53b33..6dc2c14b 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -194,6 +194,27 @@ Methods for analysing direct imports So if a module is imported twice from the same module, it will only be counted once. :rtype: Integer. +.. py:function:: ImportGraph.find_matching_direct_imports(importer_expression, imported_expression) + + Find all direct imports matching the passed expressions (see :ref:`module_expressions`). + + The imports are returned are in the following form:: + + [ + { + 'importer': 'mypackage.importer', + 'imported': 'mypackage.imported', + }, + # (additional imports here) + ] + + :param str importer_expression: A module expression used for matching importing modules. + :param str imported_expression: A module expression used for matching imported modules. + :return: An ordered list of direct imports matching the expressions (ordered alphabetically). + :rtype: List of dictionaries with the structure shown above. If you want to use type annotations, you may use the + ``grimp.Import`` TypedDict for each dictionary. + :raises: ``grimp.exceptions.InvalidModuleExpression`` if either of the module expressions is invalid. + Methods for analysing import chains ----------------------------------- diff --git a/rust/src/graph/direct_import_queries.rs b/rust/src/graph/direct_import_queries.rs index 2023e94d..ff4ccfa3 100644 --- a/rust/src/graph/direct_import_queries.rs +++ b/rust/src/graph/direct_import_queries.rs @@ -1,8 +1,9 @@ use crate::errors::{GrimpError, GrimpResult}; use crate::graph::{ ExtendWithDescendants, Graph, ImportDetails, ModuleToken, EMPTY_IMPORT_DETAILS, - EMPTY_MODULE_TOKENS, + EMPTY_MODULE_TOKENS, MODULE_NAMES, }; +use crate::module_expressions::ModuleExpression; use rustc_hash::FxHashSet; impl Graph { @@ -54,4 +55,72 @@ impl Graph { None => &EMPTY_IMPORT_DETAILS, } } + + pub fn find_matching_direct_imports( + &self, + importer_expression: &ModuleExpression, + imported_expression: &ModuleExpression, + ) -> FxHashSet<(ModuleToken, ModuleToken)> { + let interner = MODULE_NAMES.read().unwrap(); + self.imports + .iter() + .flat_map(|(importer, imports)| { + imports + .iter() + .cloned() + .map(move |imported| (importer, imported)) + }) + .filter(|(importer, imported)| { + let importer = self.get_module(*importer).unwrap(); + let importer_name = interner.resolve(importer.interned_name).unwrap(); + let imported = self.get_module(*imported).unwrap(); + let imported_name = interner.resolve(imported.interned_name).unwrap(); + importer_expression.is_match(importer_name) + && imported_expression.is_match(imported_name) + }) + .collect() + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_find_matching_direct_imports() { + let mut graph = Graph::default(); + + let _ = graph.get_or_add_module("pkg.animals").token; + let dog = graph.get_or_add_module("pkg.animals.dog").token; + let cat = graph.get_or_add_module("pkg.animals.cat").token; + let _ = graph.get_or_add_module("pkg.food").token; + let chicken = graph.get_or_add_module("pkg.food.chicken").token; + let fish = graph.get_or_add_module("pkg.food.fish").token; + let _ = graph.get_or_add_module("pkg.colors").token; + let golden = graph.get_or_add_module("pkg.colors.golden").token; + let ginger = graph.get_or_add_module("pkg.colors.ginger").token; + let _ = graph.get_or_add_module("pkg.shops").token; + let tesco = graph.get_or_add_module("pkg.shops.tesco").token; + let coop = graph.get_or_add_module("pkg.shops.coop").token; + + // Should match + graph.add_import(dog, chicken); + graph.add_import(cat, fish); + // Should not match: Imported does not match + graph.add_import(dog, golden); + graph.add_import(cat, ginger); + // Should not match: Importer does not match + graph.add_import(tesco, chicken); + graph.add_import(coop, fish); + + let importer_expression = "pkg.animals.*".parse().unwrap(); + let imported_expression = "pkg.food.*".parse().unwrap(); + let matching_imports = + graph.find_matching_direct_imports(&importer_expression, &imported_expression); + + assert_eq!( + matching_imports, + FxHashSet::from_iter([(dog, chicken), (cat, fish)]) + ); + } } diff --git a/rust/src/lib.rs b/rust/src/lib.rs index ef238439..b4506b0d 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -272,6 +272,41 @@ impl GraphWrapper { ) } + #[pyo3(signature = (*, importer_expression, imported_expression))] + pub fn find_matching_direct_imports<'py>( + &self, + py: Python<'py>, + importer_expression: &str, + imported_expression: &str, + ) -> PyResult> { + let importer_expression: ModuleExpression = importer_expression.parse()?; + let imported_expression: ModuleExpression = imported_expression.parse()?; + + let matching_imports = self + ._graph + .find_matching_direct_imports(&importer_expression, &imported_expression); + + PyList::new( + py, + matching_imports + .into_iter() + .map(|(importer, imported)| { + let importer = self._graph.get_module(importer).unwrap(); + let imported = self._graph.get_module(imported).unwrap(); + Import::new(importer.name(), imported.name()) + }) + .sorted() + .map(|import| { + [ + ("importer", import.importer.into_py_any(py).unwrap()), + ("imported", import.imported.into_py_any(py).unwrap()), + ] + .into_py_dict(py) + .unwrap() + }), + ) + } + #[allow(unused_variables)] #[pyo3(signature = (module, as_package=false))] pub fn find_downstream_modules( @@ -553,6 +588,12 @@ impl GraphWrapper { } } +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, new)] +struct Import { + importer: String, + imported: String, +} + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, new)] struct ImportDetails { importer: String, diff --git a/src/grimp/__init__.py b/src/grimp/__init__.py index 7b9fc48a..160822d1 100644 --- a/src/grimp/__init__.py +++ b/src/grimp/__init__.py @@ -1,6 +1,6 @@ __version__ = "3.6" -from .application.ports.graph import DetailedImport, ImportGraph +from .application.ports.graph import DetailedImport, ImportGraph, Import from .domain.analysis import PackageDependency, Route from .domain.valueobjects import DirectImport, Module, Layer from .main import build_graph @@ -9,6 +9,7 @@ "Module", "DetailedImport", "DirectImport", + "Import", "ImportGraph", "PackageDependency", "Route", diff --git a/src/grimp/adaptors/graph.py b/src/grimp/adaptors/graph.py index 8df52576..3f749f88 100644 --- a/src/grimp/adaptors/graph.py +++ b/src/grimp/adaptors/graph.py @@ -1,6 +1,6 @@ from __future__ import annotations from typing import List, Optional, Sequence, Set, Tuple, TypedDict -from grimp.application.ports.graph import DetailedImport +from grimp.application.ports.graph import DetailedImport, Import from grimp.domain.analysis import PackageDependency, Route from grimp.domain.valueobjects import Layer from grimp import _rustgrimp as rust # type: ignore[attr-defined] @@ -108,6 +108,16 @@ def get_import_details(self, *, importer: str, imported: str) -> List[DetailedIm imported=imported, ) + def find_matching_direct_imports( + self, *, importer_expression: str, imported_expression: str + ) -> List[Import]: + try: + return self._rustgraph.find_matching_direct_imports( + importer_expression=importer_expression, imported_expression=imported_expression + ) + except rust.InvalidModuleExpression as e: + raise InvalidModuleExpression(str(e)) from e + def find_downstream_modules(self, module: str, as_package: bool = False) -> Set[str]: return self._rustgraph.find_downstream_modules(module, as_package) diff --git a/src/grimp/application/ports/graph.py b/src/grimp/application/ports/graph.py index 0b33235c..8bc2588d 100644 --- a/src/grimp/application/ports/graph.py +++ b/src/grimp/application/ports/graph.py @@ -9,9 +9,12 @@ from grimp.domain.valueobjects import Layer -class DetailedImport(TypedDict): +class Import(TypedDict): importer: str imported: str + + +class DetailedImport(Import): line_number: int line_contents: str @@ -211,6 +214,35 @@ def get_import_details(self, *, importer: str, imported: str) -> List[DetailedIm """ raise NotImplementedError + @abc.abstractmethod + def find_matching_direct_imports( + self, *, importer_expression: str, imported_expression: str + ) -> List[Import]: + """ + Find all direct imports matching the passed expressions. + + The imports are returned are in the following form: + + [ + { + 'importer': 'mypackage.importer', + 'imported': 'mypackage.imported', + }, + ... + ] + + Args: + importer_expression: A module expression used for matching importing modules. + imported_expression: A module expression used for matching imported modules. + Returns: + An ordered list of direct imports matching the expressions (ordered alphabetically). + Raises: + InvalidModuleExpression if either of the passed expressions are invalid. + + See `ImportGraph.find_matching_modules` for a description of module expressions. + """ + raise NotImplementedError + # Indirect imports # ---------------- diff --git a/tests/benchmarking/test_benchmarking.py b/tests/benchmarking/test_benchmarking.py index 509de55b..b57d66f7 100644 --- a/tests/benchmarking/test_benchmarking.py +++ b/tests/benchmarking/test_benchmarking.py @@ -469,3 +469,13 @@ def test_find_matching_modules(benchmark, large_graph): benchmark, lambda: large_graph.find_matching_modules("mypackage.domain.**") ) assert len(matching_modules) == 2519 + + +def test_find_matching_direct_imports(benchmark, large_graph): + matching_imports = _run_benchmark( + benchmark, + lambda: large_graph.find_matching_direct_imports( + importer_expression="mypackage.domain.**", imported_expression="mypackage.data.**" + ), + ) + assert len(matching_imports) == 4051 diff --git a/tests/unit/adaptors/graph/test_direct_imports.py b/tests/unit/adaptors/graph/test_direct_imports.py index e64593e0..96870a46 100644 --- a/tests/unit/adaptors/graph/test_direct_imports.py +++ b/tests/unit/adaptors/graph/test_direct_imports.py @@ -1,6 +1,7 @@ import pytest # type: ignore from grimp.adaptors.graph import ImportGraph +from grimp.exceptions import InvalidModuleExpression def test_find_modules_directly_imported_by(): @@ -265,3 +266,100 @@ def test_returns_only_relevant_imports(self): assert imports_info == graph.get_import_details( importer="mypackage.foo", imported="mypackage.bar" ) + + +class TestFindMatchingDirectImports: + @pytest.mark.parametrize( + "import_line_number,import_line_contents", + [ + [None, None], + [1, "..."], + ], + ) + def test_finds_matching_direct_imports(self, import_line_number, import_line_contents): + graph = ImportGraph() + # Should match + graph.add_import( + importer="pkg.animals.dog", + imported="pkg.food.chicken", + line_number=import_line_number, + line_contents=import_line_contents, + ) + graph.add_import( + importer="pkg.animals.cat", + imported="pkg.food.fish", + line_number=import_line_number, + line_contents=import_line_contents, + ) + # Should not match: Imported does not match + graph.add_import( + importer="pkg.animals.dog", + imported="pkg.colors.golden", + line_number=import_line_number, + line_contents=import_line_contents, + ) + graph.add_import( + importer="pkg.animals.cat", + imported="pkg.colors.ginger", + line_number=import_line_number, + line_contents=import_line_contents, + ) + # Should not match: Importer does not match + graph.add_import( + importer="pkg.shops.tesco", + imported="pkg.food.chicken", + line_number=import_line_number, + line_contents=import_line_contents, + ) + graph.add_import( + importer="pkg.shops.coop", + imported="pkg.food.fish", + line_number=import_line_number, + line_contents=import_line_contents, + ) + + assert graph.find_matching_direct_imports( + importer_expression="pkg.animals.*", imported_expression="pkg.food.*" + ) == [ + {"importer": "pkg.animals.cat", "imported": "pkg.food.fish"}, + {"importer": "pkg.animals.dog", "imported": "pkg.food.chicken"}, + ] + + def test_deduplicates_imports(self): + graph = ImportGraph() + graph.add_import( + importer="pkg.animals.dog", + imported="pkg.colors.golden", + line_number=1, + line_contents="...1", + ) + graph.add_import( + importer="pkg.animals.dog", + imported="pkg.colors.golden", + line_number=2, + line_contents="...2", + ) + + assert graph.find_matching_direct_imports( + importer_expression="pkg.animals.*", imported_expression="pkg.colors.*" + ) == [ + {"importer": "pkg.animals.dog", "imported": "pkg.colors.golden"}, + ] + + def test_raises_error_if_importer_expression_is_invalid(self): + graph = ImportGraph() + with pytest.raises( + InvalidModuleExpression, match="foo.. is not a valid module expression." + ): + graph.find_matching_direct_imports( + importer_expression="foo..", imported_expression="bar" + ) + + def test_raises_error_if_imported_expression_is_invalid(self): + graph = ImportGraph() + with pytest.raises( + InvalidModuleExpression, match="bar.. is not a valid module expression." + ): + graph.find_matching_direct_imports( + importer_expression="foo", imported_expression="bar.." + ) From 6f0022f51ec5509e3b2205e2edab7067c31e3b4b Mon Sep 17 00:00:00 2001 From: Peter Byfield Date: Fri, 21 Feb 2025 22:48:45 +0100 Subject: [PATCH 4/5] Update changelog --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f1b2f40c..4ce213a2 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,7 @@ Changelog Unreleased ---------- +* Added basic functions for working with module expressions: `find_matching_modules` and `find_matching_direct_imports`. * Added `as_packages` option to the `find_shortest_chain` method. 3.6 (2025-02-07) From eb43939f60ae833b00def4906a9d705b1a5adbf0 Mon Sep 17 00:00:00 2001 From: Peter Byfield Date: Sat, 22 Feb 2025 07:42:51 +0100 Subject: [PATCH 5/5] Rename find_shortest_path Remove the "bidirectional" from the function name, since this is the only path finding function we have. The "bidirectional" part is just an implementation detail. --- rust/src/graph/import_chain_queries.rs | 4 ++-- rust/src/graph/pathfinding.rs | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/rust/src/graph/import_chain_queries.rs b/rust/src/graph/import_chain_queries.rs index 4f21da48..3a753a60 100644 --- a/rust/src/graph/import_chain_queries.rs +++ b/rust/src/graph/import_chain_queries.rs @@ -1,5 +1,5 @@ use crate::errors::GrimpResult; -use crate::graph::pathfinding::{find_reach, find_shortest_path_bidirectional}; +use crate::graph::pathfinding::{find_reach, find_shortest_path}; use crate::graph::{ExtendWithDescendants, Graph, ModuleToken}; use itertools::Itertools; use rustc_hash::{FxHashMap, FxHashSet}; @@ -62,7 +62,7 @@ impl Graph { excluded_modules: &FxHashSet, excluded_imports: &FxHashMap>, ) -> GrimpResult>> { - find_shortest_path_bidirectional( + find_shortest_path( self, from_modules, to_modules, diff --git a/rust/src/graph/pathfinding.rs b/rust/src/graph/pathfinding.rs index 55e9450d..742eae77 100644 --- a/rust/src/graph/pathfinding.rs +++ b/rust/src/graph/pathfinding.rs @@ -28,7 +28,8 @@ pub fn find_reach( &seen.into_iter().collect::>() - from_modules } -pub fn find_shortest_path_bidirectional( +/// Finds the shortest path, via a bidirectional BFS. +pub fn find_shortest_path( graph: &Graph, from_modules: &FxHashSet, to_modules: &FxHashSet,