From 4d701e89c9a15b3b2e505da307052d37f65dacfd Mon Sep 17 00:00:00 2001 From: Peter Byfield Date: Sat, 8 Feb 2025 06:29:41 +0100 Subject: [PATCH 1/4] Add line number and contents to large graph Else the copy benchmark isn't really fair --- tests/benchmarking/test_benchmarking.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/benchmarking/test_benchmarking.py b/tests/benchmarking/test_benchmarking.py index 4cedd334..b855f7d6 100644 --- a/tests/benchmarking/test_benchmarking.py +++ b/tests/benchmarking/test_benchmarking.py @@ -20,7 +20,12 @@ def large_graph(): for importer, importeds in graph_dict.items(): graph.add_module(importer) for imported in importeds: - graph.add_import(importer=importer, imported=imported) + graph.add_import( + importer=importer, + imported=imported, + line_number=1, + line_contents=f"import {imported}", + ) return graph From 669eb8de870af03ef23a3c23d65842490455c4d0 Mon Sep 17 00:00:00 2001 From: Peter Byfield Date: Fri, 7 Feb 2025 22:09:56 +0100 Subject: [PATCH 2/4] Intern strings for import details line contents I'd gone to all the trouble to remove strings from the graph in the module names, but then forgot about all the strings in the import details! This gives a signifcant boost to the performance of copying the graph. --- rust/src/graph/graph_manipulation.rs | 16 +++++++++++----- rust/src/graph/mod.rs | 17 ++++++++++++++--- rust/src/lib.rs | 2 +- 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/rust/src/graph/graph_manipulation.rs b/rust/src/graph/graph_manipulation.rs index 41428236..28680eae 100644 --- a/rust/src/graph/graph_manipulation.rs +++ b/rust/src/graph/graph_manipulation.rs @@ -1,4 +1,6 @@ -use crate::graph::{Graph, ImportDetails, Module, ModuleIterator, ModuleToken, MODULE_NAMES}; +use crate::graph::{ + Graph, ImportDetails, Module, ModuleIterator, ModuleToken, IMPORT_LINE_CONTENTS, MODULE_NAMES, +}; use rustc_hash::FxHashSet; use slotmap::secondary::Entry; @@ -129,10 +131,14 @@ impl Graph { .unwrap() .or_default() .insert(importer); - self.import_details - .entry((importer, imported)) - .or_default() - .insert(ImportDetails::new(line_number, line_contents.to_owned())); + { + let mut interner = IMPORT_LINE_CONTENTS.write().unwrap(); + let line_contents = interner.get_or_intern(line_contents); + self.import_details + .entry((importer, imported)) + .or_default() + .insert(ImportDetails::new(line_number, line_contents)); + } } pub fn remove_import(&mut self, importer: ModuleToken, imported: ModuleToken) { diff --git a/rust/src/graph/mod.rs b/rust/src/graph/mod.rs index cdbd5380..87b6a762 100644 --- a/rust/src/graph/mod.rs +++ b/rust/src/graph/mod.rs @@ -19,6 +19,8 @@ pub(crate) mod pathfinding; lazy_static! { static ref MODULE_NAMES: RwLock> = RwLock::new(StringInterner::default()); + static ref IMPORT_LINE_CONTENTS: RwLock> = + RwLock::new(StringInterner::default()); static ref EMPTY_MODULE_TOKENS: FxHashSet = FxHashSet::default(); static ref EMPTY_IMPORT_DETAILS: FxHashSet = FxHashSet::default(); static ref EMPTY_IMPORTS: FxHashSet<(ModuleToken, ModuleToken)> = FxHashSet::default(); @@ -131,7 +133,16 @@ pub struct ImportDetails { #[getset(get_copy = "pub")] line_number: usize, - #[new(into)] - #[getset(get = "pub")] - line_contents: String, + #[getset(get_copy = "pub")] + interned_line_contents: DefaultSymbol, +} + +impl ImportDetails { + pub fn line_contents(&self) -> String { + let interner = IMPORT_LINE_CONTENTS.read().unwrap(); + interner + .resolve(self.interned_line_contents) + .unwrap() + .to_owned() + } } diff --git a/rust/src/lib.rs b/rust/src/lib.rs index d293f852..13dd24c5 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -237,7 +237,7 @@ impl GraphWrapper { importer.name_as_string(), imported.name_as_string(), import_details.line_number(), - import_details.line_contents().to_owned(), + import_details.line_contents(), ) }) .sorted() From 98fa5aca7ac6be9e09f260a9ed86cef1aa59714e Mon Sep 17 00:00:00 2001 From: Peter Byfield Date: Fri, 7 Feb 2025 22:16:14 +0100 Subject: [PATCH 3/4] Rename Module.name as interned_name --- rust/src/graph/graph_manipulation.rs | 2 +- rust/src/graph/mod.rs | 14 ++++----- rust/src/lib.rs | 47 ++++++++++------------------ 3 files changed, 25 insertions(+), 38 deletions(-) diff --git a/rust/src/graph/graph_manipulation.rs b/rust/src/graph/graph_manipulation.rs index 28680eae..aff238f1 100644 --- a/rust/src/graph/graph_manipulation.rs +++ b/rust/src/graph/graph_manipulation.rs @@ -33,7 +33,7 @@ impl Graph { } else { let module = self.modules.insert_with_key(|token| Module { token, - name, + interned_name: name, is_invisible: !ancestor_names.is_empty(), is_squashed: false, }); diff --git a/rust/src/graph/mod.rs b/rust/src/graph/mod.rs index 87b6a762..f57a1a83 100644 --- a/rust/src/graph/mod.rs +++ b/rust/src/graph/mod.rs @@ -34,7 +34,7 @@ pub struct Module { token: ModuleToken, #[getset(get_copy = "pub")] - name: DefaultSymbol, + interned_name: DefaultSymbol, // Invisible modules exist in the hierarchy but haven't been explicitly added to the graph. #[getset(get_copy = "pub")] @@ -45,9 +45,9 @@ pub struct Module { } impl Module { - pub fn name_as_string(&self) -> String { + pub fn name(&self) -> String { let interner = MODULE_NAMES.read().unwrap(); - interner.resolve(self.name).unwrap().to_owned() + interner.resolve(self.interned_name).unwrap().to_owned() } } @@ -104,13 +104,13 @@ pub trait ModuleIterator<'a>: Iterator + Sized { self.map(|m| m.token) } - fn names(self) -> impl Iterator { - self.map(|m| m.name) + fn interned_names(self) -> impl Iterator { + self.map(|m| m.interned_name) } - fn names_as_strings(self) -> impl Iterator { + fn names(self) -> impl Iterator { let interner = MODULE_NAMES.read().unwrap(); - self.map(move |m| interner.resolve(m.name).unwrap().to_owned()) + self.map(move |m| interner.resolve(m.interned_name).unwrap().to_owned()) } fn visible(self) -> impl ModuleIterator<'a> { diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 13dd24c5..5c5e0f4e 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -49,11 +49,7 @@ impl GraphWrapper { } pub fn get_modules(&self) -> HashSet { - self._graph - .all_modules() - .visible() - .names_as_strings() - .collect() + self._graph.all_modules().visible().names().collect() } pub fn contains_module(&self, name: &str) -> bool { @@ -156,7 +152,7 @@ impl GraphWrapper { ._graph .get_module_children(module.token()) .visible() - .names_as_strings() + .names() .collect()) } @@ -169,7 +165,7 @@ impl GraphWrapper { ._graph .get_module_descendants(module.token()) .visible() - .names_as_strings() + .names() .collect()) } @@ -195,7 +191,7 @@ impl GraphWrapper { .iter() .into_module_iterator(&self._graph) .visible() - .names_as_strings() + .names() .collect()) } @@ -207,7 +203,7 @@ impl GraphWrapper { .iter() .into_module_iterator(&self._graph) .visible() - .names_as_strings() + .names() .collect()) } @@ -234,8 +230,8 @@ impl GraphWrapper { .iter() .map(|import_details| { ImportDetails::new( - importer.name_as_string(), - imported.name_as_string(), + importer.name(), + imported.name(), import_details.line_number(), import_details.line_contents(), ) @@ -274,7 +270,7 @@ impl GraphWrapper { .iter() .into_module_iterator(&self._graph) .visible() - .names_as_strings() + .names() .collect()) } @@ -292,7 +288,7 @@ impl GraphWrapper { .iter() .into_module_iterator(&self._graph) .visible() - .names_as_strings() + .names() .collect()) } @@ -311,7 +307,7 @@ impl GraphWrapper { chain .iter() .into_module_iterator(&self._graph) - .names_as_strings() + .names() .collect() })) } @@ -348,7 +344,7 @@ impl GraphWrapper { chain .iter() .into_module_iterator(&self._graph) - .names_as_strings() + .names() .collect::>(), ) .unwrap() @@ -391,14 +387,8 @@ impl GraphWrapper { .into_iter() .map(|dep| { PackageDependency::new( - self._graph - .get_module(*dep.importer()) - .unwrap() - .name_as_string(), - self._graph - .get_module(*dep.imported()) - .unwrap() - .name_as_string(), + self._graph.get_module(*dep.importer()).unwrap().name(), + self._graph.get_module(*dep.imported()).unwrap().name(), dep.routes() .iter() .map(|route| { @@ -406,17 +396,17 @@ impl GraphWrapper { route .heads() .iter() - .map(|m| self._graph.get_module(*m).unwrap().name_as_string()) + .map(|m| self._graph.get_module(*m).unwrap().name()) .collect(), route .middle() .iter() - .map(|m| self._graph.get_module(*m).unwrap().name_as_string()) + .map(|m| self._graph.get_module(*m).unwrap().name()) .collect(), route .tails() .iter() - .map(|m| self._graph.get_module(*m).unwrap().name_as_string()) + .map(|m| self._graph.get_module(*m).unwrap().name()) .collect(), ) }) @@ -460,10 +450,7 @@ impl GraphWrapper { ) -> Vec> { let containers = match containers.is_empty() { true => vec![None], - false => containers - .iter() - .map(|c| Some(c.name_as_string())) - .collect(), + false => containers.iter().map(|c| Some(c.name())).collect(), }; let mut levels_by_container: Vec> = vec![]; From ae3ea3dfee243a97c2d5359f2f0edf75e7c8f440 Mon Sep 17 00:00:00 2001 From: Peter Byfield Date: Fri, 7 Feb 2025 22:25:30 +0100 Subject: [PATCH 4/4] Use u32 for import details line number This should be enough! --- rust/src/graph/graph_manipulation.rs | 2 +- rust/src/graph/mod.rs | 2 +- rust/src/lib.rs | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/rust/src/graph/graph_manipulation.rs b/rust/src/graph/graph_manipulation.rs index aff238f1..caed21c1 100644 --- a/rust/src/graph/graph_manipulation.rs +++ b/rust/src/graph/graph_manipulation.rs @@ -118,7 +118,7 @@ impl Graph { &mut self, importer: ModuleToken, imported: ModuleToken, - line_number: usize, + line_number: u32, line_contents: &str, ) { self.imports diff --git a/rust/src/graph/mod.rs b/rust/src/graph/mod.rs index f57a1a83..8a97b2bd 100644 --- a/rust/src/graph/mod.rs +++ b/rust/src/graph/mod.rs @@ -131,7 +131,7 @@ impl<'a, T: Iterator> ModuleTokenIterator<'a> for T {} #[derive(Debug, Clone, PartialEq, Eq, Hash, new, Getters, CopyGetters)] pub struct ImportDetails { #[getset(get_copy = "pub")] - line_number: usize, + line_number: u32, #[getset(get_copy = "pub")] interned_line_contents: DefaultSymbol, diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 5c5e0f4e..ff8a0588 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -111,7 +111,7 @@ impl GraphWrapper { &mut self, importer: &str, imported: &str, - line_number: Option, + line_number: Option, line_contents: Option<&str>, ) { let importer = self._graph.get_or_add_module(importer).token(); @@ -540,7 +540,7 @@ impl GraphWrapper { struct ImportDetails { importer: String, imported: String, - line_number: usize, + line_number: u32, line_contents: String, }