Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

138 changes: 138 additions & 0 deletions rust/src/graph/cycles.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/*
TODO K4liber: description

Finds all cycles using Johnson's algorithm.
Copy link
Collaborator

@Peter554 Peter554 Feb 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 What's the advantage here of Johnson's algorithm over just using the existing pathfinding function with from_modules set equal to to_modules? E.g. why not find_shortest_path_bidirectional(&graph, &start_modules, &start_modules, ...) - I think that should work? Adding a new algorithm like this adds quite some complexity that needs to be maintained - especially since your algorithm basically transforms the graph to build an entirely new data structure (_DirectedGraph).

Copy link
Collaborator

@Peter554 Peter554 Feb 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

E.g. why not find_shortest_path_bidirectional(&graph, &start_modules, &start_modules, ...) - I think that should work?

I tried this out, seems to work -> #194

*/
use std::collections::{HashSet, HashMap};

use crate::graph::Graph;

struct _DirectedGraph {
adj_list: HashMap<String, Vec<String>>,
}

impl _DirectedGraph {

fn new() -> Self {
_DirectedGraph {
adj_list: HashMap::new()
}
}

fn add_edge(&mut self, u: &str, v: &str) {
self.adj_list
.entry(u.to_string())
.or_insert_with(Vec::new)
.push(v.to_string());
}

fn find_cycles(&self) -> Vec<Vec<String>> {
let mut cycles = Vec::new();
let mut blocked = HashSet::new();
let mut stack = Vec::new();
let mut b_sets: HashMap<String, HashSet<String>> = HashMap::new();

for start in self.adj_list.keys() {
let mut subgraph = self.build_subgraph(start);
self.find_cycles_from(
start,
start,
&mut blocked,
&mut stack,
&mut b_sets,
&mut cycles,
&mut subgraph,
);
}

cycles
}

fn build_subgraph(&self, start: &String) -> HashMap<String, Vec<String>> {
self.adj_list
.iter()
.filter(|(node, _)| node >= &start)
.map(|(node, neighbors)| {
let filtered_neighbors: Vec<String> =
neighbors.iter().filter(|n| n >= &start).cloned().collect();
(node.clone(), filtered_neighbors)
})
.collect()
}

fn find_cycles_from(
&self,
start: &String,
v: &String,
blocked: &mut HashSet<String>,
stack: &mut Vec<String>,
b_sets: &mut HashMap<String, HashSet<String>>,
cycles: &mut Vec<Vec<String>>,
subgraph: &mut HashMap<String, Vec<String>>,
) -> bool {
let mut found_cycle = false;
stack.push(v.clone());
blocked.insert(v.clone());

let neighbors = subgraph.get(v).cloned().unwrap_or_default();
for w in neighbors {
if &w == start {
cycles.push(stack.clone());
found_cycle = true;
} else if !blocked.contains(&w) {
if self.find_cycles_from(start, &w, blocked, stack, b_sets, cycles, subgraph) {
found_cycle = true;
}
}
}

if found_cycle {
self.unblock(v, blocked, b_sets);
} else {
if let Some(neighbors) = subgraph.get(v) {
for w in neighbors {
b_sets.entry(w.clone()).or_default().insert(v.clone());
}
}
}

stack.pop();
found_cycle
}

fn unblock(&self, node: &String, blocked: &mut HashSet<String>, b_sets: &mut HashMap<String, HashSet<String>>) {
blocked.remove(node);
let mut stack = vec![node.clone()];

while let Some(n) = stack.pop() {
if let Some(dependent_nodes) = b_sets.remove(&n) {
for w in dependent_nodes {
if blocked.remove(&w) {
stack.push(w);
}
}
}
}
}
}

impl Graph {
pub fn find_cycles(
&self
) -> Vec<Vec<String>> {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 I'm not sure we want to work with String here . Inside Graph I've designed the code to work in terms of ModuleTokens. We generally translate back to strings just for the python layer (i.e. in GraphWrapper in lib.rs).

let mut directed_graph = _DirectedGraph::new();

for module in self.all_modules() {
let module_token = module.token();
let imports_modules_tokens = self.imports.get(module_token).unwrap().iter();

for next_module in imports_modules_tokens {
let imported_module = self.get_module(*next_module);
directed_graph.add_edge(module.name().as_str(), imported_module.unwrap().name().as_str());
}
}

let cycles = directed_graph.find_cycles();
return cycles;
}
}
1 change: 1 addition & 0 deletions rust/src/graph/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub mod graph_manipulation;
pub mod hierarchy_queries;
pub mod higher_order_queries;
pub mod import_chain_queries;
pub mod cycles;

pub(crate) mod pathfinding;

Expand Down
6 changes: 6 additions & 0 deletions rust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,12 @@ impl GraphWrapper {
self.convert_package_dependencies_to_python(py, illegal_dependencies)
}

pub fn find_cycles(
&self
) -> PyResult<Vec<Vec<String>>> {
Comment on lines +423 to +425
Copy link
Collaborator

@Peter554 Peter554 Feb 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 To find all cycles in the entire graph like this sounds like it could be very computationally expensive for large graphs. Should we instead just give one start module/package to work from? e.g. find_shortest_cycle(&self, module: &str, as_package: bool) -> PyResult <Option<Vec<String>>> might be a more reasonable API.

Ok(self._graph.find_cycles())
}

pub fn clone(&self) -> GraphWrapper {
GraphWrapper {
_graph: self._graph.clone(),
Expand Down
28 changes: 28 additions & 0 deletions rust/tests/test_cycles.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
use _rustgrimp::graph::Graph;

#[test]
fn test_find_cycles() {
// Given
let mut graph = Graph::default();
let dependencies: Vec<Vec<String>> = vec![
vec!["A".to_string(), "B".to_string()],
vec!["B".to_string(), "C".to_string()],
vec!["C".to_string(), "A".to_string()],
];

for modules in dependencies {
let importer_name = &modules[0];
let imported_name = &modules[1];
let importer = graph.get_or_add_module(importer_name).token();
let imported = graph.get_or_add_module(imported_name).token();
graph.add_import(importer, imported)
}

let expected_cycles: Vec<Vec<String>> = vec![
vec!["A".to_string(), "B".to_string(), "C".to_string()],
];
// When
let cycles = graph.find_cycles();
// Then
assert_eq!(cycles, expected_cycles);
}
6 changes: 6 additions & 0 deletions src/grimp/adaptors/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,12 @@ def find_illegal_dependencies_for_layers(

return _dependencies_from_tuple(result)

def find_cycles(
self
) -> list[list[str]]:
result: list[list[str]] = self._rustgraph.find_cycles()
return result

# Dunder methods
# --------------

Expand Down
9 changes: 9 additions & 0 deletions src/grimp/application/ports/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,15 @@ def find_illegal_dependencies_for_layers(
"""
raise NotImplementedError

@abc.abstractmethod
def find_cycles(self) -> list[list[str]]:
"""
TODO K4liber:
- docs

"""
raise NotImplementedError

def __repr__(self) -> str:
"""
Display the instance in one of the following ways:
Expand Down
26 changes: 26 additions & 0 deletions tests/unit/test_cycles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
TODO K4liber:
- remove this notes
- add more test cases (real-life libraries?)

To rebuild the rust binary run the following command from the root directory:

python -m pip install -e .

"""

from grimp.adaptors.graph import ImportGraph


class TestFindCycles:

def test_empty_graph(self) -> None:
graph = ImportGraph()
graph.add_module("A")
graph.add_module("B")
graph.add_module("C")
graph.add_import(importer="A", imported="B")
graph.add_import(importer="B", imported="C")
graph.add_import(importer="C", imported="A")
cycles = graph.find_cycles()
assert cycles == [["A", "B", "C"]]
Loading