diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7ae982e5..62b48b4c 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,11 @@ Changelog ========= +latest +------ + +* Use Rust instead of Python's built-in ast module for import parsing. + 3.8.2 (2025-04-24) ------------------ diff --git a/rust/Cargo.lock b/rust/Cargo.lock index ff5b889c..f4ae21c1 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -11,13 +11,16 @@ dependencies = [ "derive-new", "getset", "indexmap 2.9.0", - "itertools", + "itertools 0.14.0", "lazy_static", "parameterized", "pyo3", "rayon", "regex", - "rustc-hash", + "ruff_python_ast", + "ruff_python_parser", + "ruff_source_file", + "rustc-hash 2.1.1", "serde_json", "slotmap", "string-interner", @@ -46,6 +49,23 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7" +[[package]] +name = "bitflags" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" + +[[package]] +name = "bstr" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -126,6 +146,26 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "getset" version = "0.1.5" @@ -185,6 +225,27 @@ version = "2.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" +[[package]] +name = "is-macro" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d57a3e447e24c22647738e4607f1df1e0ec6f72e16182c4cd199f647cdfb0e4" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.14.0" @@ -212,6 +273,12 @@ version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +[[package]] +name = "log" +version = "0.4.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" + [[package]] name = "memchr" version = "2.7.4" @@ -254,12 +321,59 @@ dependencies = [ "syn", ] +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + [[package]] name = "portable-atomic" version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "proc-macro-error-attr2" version = "2.0.0" @@ -363,6 +477,36 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + [[package]] name = "rayon" version = "1.10.0" @@ -412,6 +556,72 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" +[[package]] +name = "ruff_python_ast" +version = "0.0.0" +source = "git+https://github.com/astral-sh/ruff.git?tag=v0.4.10#b54922fd7394c36cdc390fd21aaee99206ebc361" +dependencies = [ + "aho-corasick", + "bitflags", + "is-macro", + "itertools 0.13.0", + "once_cell", + "ruff_python_trivia", + "ruff_source_file", + "ruff_text_size", + "rustc-hash 1.1.0", +] + +[[package]] +name = "ruff_python_parser" +version = "0.0.0" +source = "git+https://github.com/astral-sh/ruff.git?tag=v0.4.10#b54922fd7394c36cdc390fd21aaee99206ebc361" +dependencies = [ + "bitflags", + "bstr", + "memchr", + "ruff_python_ast", + "ruff_python_trivia", + "ruff_text_size", + "rustc-hash 1.1.0", + "static_assertions", + "unicode-ident", + "unicode-normalization", + "unicode_names2", +] + +[[package]] +name = "ruff_python_trivia" +version = "0.0.0" +source = "git+https://github.com/astral-sh/ruff.git?tag=v0.4.10#b54922fd7394c36cdc390fd21aaee99206ebc361" +dependencies = [ + "itertools 0.13.0", + "ruff_source_file", + "ruff_text_size", + "unicode-ident", +] + +[[package]] +name = "ruff_source_file" +version = "0.0.0" +source = "git+https://github.com/astral-sh/ruff.git?tag=v0.4.10#b54922fd7394c36cdc390fd21aaee99206ebc361" +dependencies = [ + "memchr", + "once_cell", + "ruff_text_size", +] + +[[package]] +name = "ruff_text_size" +version = "0.0.0" +source = "git+https://github.com/astral-sh/ruff.git?tag=v0.4.10#b54922fd7394c36cdc390fd21aaee99206ebc361" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc-hash" version = "2.1.1" @@ -456,6 +666,12 @@ dependencies = [ "serde", ] +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "slotmap" version = "1.0.7" @@ -465,6 +681,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "string-interner" version = "0.18.0" @@ -518,18 +740,70 @@ dependencies = [ "syn", ] +[[package]] +name = "tinyvec" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + [[package]] name = "unicode-ident" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-normalization" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "unicode-xid" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unicode_names2" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1673eca9782c84de5f81b82e4109dcfb3611c8ba0d52930ec4a9478f547b2dd" +dependencies = [ + "phf", + "unicode_names2_generator", +] + +[[package]] +name = "unicode_names2_generator" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91e5b84611016120197efd7dc93ef76774f4e084cd73c9fb3ea4a86c570c56e" +dependencies = [ + "getopts", + "log", + "phf_codegen", + "rand", +] + [[package]] name = "unindent" version = "0.2.4" @@ -541,3 +815,29 @@ name = "version_check" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "zerocopy" +version = "0.8.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 925e1f36..8e5acab2 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -22,6 +22,9 @@ rustc-hash = "2.1.0" indexmap = "2.7.1" regex = "1.11.1" const_format = "0.2.34" +ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" } +ruff_python_ast = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" } +ruff_source_file = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" } [dependencies.pyo3] version = "0.24.1" diff --git a/rust/src/errors.rs b/rust/src/errors.rs index c73ec97c..a4849481 100644 --- a/rust/src/errors.rs +++ b/rust/src/errors.rs @@ -1,6 +1,7 @@ -use crate::exceptions::{InvalidModuleExpression, ModuleNotPresent, NoSuchContainer}; +use crate::exceptions::{InvalidModuleExpression, ModuleNotPresent, NoSuchContainer, ParseError}; use pyo3::PyErr; use pyo3::exceptions::PyValueError; +use ruff_python_parser::ParseError as RuffParseError; use thiserror::Error; #[derive(Debug, Error)] @@ -16,6 +17,14 @@ pub enum GrimpError { #[error("{0} is not a valid module expression.")] InvalidModuleExpression(String), + + #[error("Error parsing python code (line {line_number}, text {text}).")] + ParseError { + line_number: usize, + text: String, + #[source] + parse_error: RuffParseError, + }, } pub type GrimpResult = Result; @@ -30,6 +39,9 @@ impl From for PyErr { GrimpError::InvalidModuleExpression(_) => { InvalidModuleExpression::new_err(value.to_string()) } + GrimpError::ParseError { + line_number, text, .. + } => PyErr::new::((line_number, text)), } } } diff --git a/rust/src/exceptions.rs b/rust/src/exceptions.rs index 833cd0dd..f25a78cd 100644 --- a/rust/src/exceptions.rs +++ b/rust/src/exceptions.rs @@ -1,9 +1,28 @@ use pyo3::create_exception; +use pyo3::exceptions::PyException; +use pyo3::prelude::*; -create_exception!(_rustgrimp, ModuleNotPresent, pyo3::exceptions::PyException); -create_exception!(_rustgrimp, NoSuchContainer, pyo3::exceptions::PyException); -create_exception!( - _rustgrimp, - InvalidModuleExpression, - pyo3::exceptions::PyException -); +create_exception!(_rustgrimp, ModuleNotPresent, PyException); +create_exception!(_rustgrimp, NoSuchContainer, PyException); +create_exception!(_rustgrimp, InvalidModuleExpression, PyException); + +// We need to use here `pyclass(extends=PyException)` instead of `create_exception!` +// since the exception contains custom data. See: +// * https://github.com/PyO3/pyo3/issues/2597 +// * https://github.com/PyO3/pyo3/issues/295 +// * https://github.com/PyO3/pyo3/discussions/3259 +#[pyclass(extends=PyException)] +pub struct ParseError { + #[pyo3(get)] + pub line_number: usize, + #[pyo3(get)] + pub text: String, +} + +#[pymethods] +impl ParseError { + #[new] + fn new(line_number: usize, text: String) -> Self { + Self { line_number, text } + } +} diff --git a/rust/src/import_parsing.rs b/rust/src/import_parsing.rs new file mode 100644 index 00000000..5fbd9768 --- /dev/null +++ b/rust/src/import_parsing.rs @@ -0,0 +1,606 @@ +use crate::errors::{GrimpError, GrimpResult}; +use ruff_python_ast::statement_visitor::{StatementVisitor, walk_body, walk_stmt}; +use ruff_python_ast::{Expr, Stmt}; +use ruff_python_parser::parse_module; +use ruff_source_file::{LineIndex, SourceCode}; +use std::fs; +use std::path::Path; + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct ImportedObject { + pub name: String, + pub line_number: usize, + pub line_contents: String, + pub typechecking_only: bool, +} + +impl ImportedObject { + fn new( + name: String, + line_number: usize, + line_contents: String, + typechecking_only: bool, + ) -> Self { + Self { + name, + line_number, + line_contents, + typechecking_only, + } + } +} + +#[derive(Debug, PartialEq, Eq, Clone)] +struct ImportedObjectWithoutLineContents { + pub name: String, + pub line_number: usize, + pub typechecking_only: bool, +} + +impl ImportedObjectWithoutLineContents { + fn new(name: String, line_number: usize, typechecking_only: bool) -> Self { + Self { + name, + line_number, + typechecking_only, + } + } +} + +pub fn parse_imports(path: &Path) -> GrimpResult> { + let code = fs::read_to_string(path).expect("failed to read file"); + parse_imports_from_code(&code) +} + +pub fn parse_imports_from_code(code: &str) -> GrimpResult> { + let imports_without_line_contents = parse_imports_from_code_without_line_contents(code)?; + + let lines: Vec<&str> = code.lines().collect(); + + Ok(imports_without_line_contents + .into_iter() + .map(|i| { + ImportedObject::new( + i.name, + i.line_number, + lines[i.line_number - 1].trim_start().to_string(), + i.typechecking_only, + ) + }) + .collect()) +} + +fn parse_imports_from_code_without_line_contents( + code: &str, +) -> GrimpResult> { + let line_index = LineIndex::from_source_text(code); + let source_code = SourceCode::new(code, &line_index); + + let ast = match parse_module(code) { + Ok(ast) => ast, + Err(e) => { + let line_number = source_code.line_index(e.location.start()).get(); + let text = source_code.slice(e.location); + Err(GrimpError::ParseError { + line_number, + text: text.to_owned(), + parse_error: e, + })? + } + }; + + let mut visitor = Visitor::new(source_code); + walk_body(&mut visitor, &ast.syntax().body); + + Ok(visitor.imported_objects) +} + +#[derive(Debug)] +struct Visitor<'a> { + source_code: SourceCode<'a, 'a>, + pub imported_objects: Vec, + pub typechecking_only: bool, +} + +impl<'a> Visitor<'a> { + fn new(source_code: SourceCode<'a, 'a>) -> Self { + Self { + source_code, + imported_objects: vec![], + typechecking_only: false, + } + } +} + +impl<'a> StatementVisitor<'a> for Visitor<'a> { + fn visit_stmt(&mut self, stmt: &'a Stmt) { + match stmt { + Stmt::Import(import_stmt) => { + let line_number = self.source_code.line_index(import_stmt.range.start()); + for name in import_stmt.names.iter() { + self.imported_objects + .push(ImportedObjectWithoutLineContents::new( + name.name.id.clone(), + line_number.get(), + self.typechecking_only, + )) + } + walk_stmt(self, stmt); + } + Stmt::ImportFrom(import_from_stmt) => { + let line_number = self.source_code.line_index(import_from_stmt.range.start()); + for name in import_from_stmt.names.iter() { + let imported_object_name = match import_from_stmt.module { + Some(ref module) => { + format!( + "{}{}.{}", + ".".repeat(import_from_stmt.level as usize), + &module.id, + &name.name.id + ) + } + None => { + format!( + "{}{}", + ".".repeat(import_from_stmt.level as usize), + &name.name.id + ) + } + }; + self.imported_objects + .push(ImportedObjectWithoutLineContents::new( + imported_object_name, + line_number.get(), + self.typechecking_only, + )) + } + walk_stmt(self, stmt); + } + Stmt::If(if_stmt) => match if_stmt.test.as_ref() { + Expr::Name(expr) => { + if expr.id == "TYPE_CHECKING" { + self.typechecking_only = true; + walk_stmt(self, stmt); + self.typechecking_only = false; + } else { + walk_stmt(self, stmt); + } + } + Expr::Attribute(expr) => { + if expr.attr.id == "TYPE_CHECKING" { + self.typechecking_only = true; + walk_stmt(self, stmt); + self.typechecking_only = false; + } else { + walk_stmt(self, stmt); + } + } + _ => { + walk_stmt(self, stmt); + } + }, + _ => { + walk_stmt(self, stmt); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::parse_imports_from_code; + use parameterized::parameterized; + + #[test] + fn test_parse_empty_string() { + let imports = parse_imports_from_code("").unwrap(); + assert!(imports.is_empty()); + } + + fn parse_and_check(case: (&str, &[&str])) { + let (code, expected_imports) = case; + let imports = parse_imports_from_code(code).unwrap(); + assert_eq!( + expected_imports, + imports.into_iter().map(|i| i.name).collect::>() + ); + } + + fn parse_and_check_with_typechecking_only(case: (&str, &[(&str, bool)])) { + let (code, expected_imports) = case; + let imports = parse_imports_from_code(code).unwrap(); + assert_eq!( + expected_imports + .iter() + .map(|i| (i.0.to_owned(), i.1)) + .collect::>(), + imports + .into_iter() + .map(|i| (i.name, i.typechecking_only)) + .collect::>() + ); + } + + #[parameterized(case = { + ("import foo", &["foo"]), + ("import foo_FOO_123", &["foo_FOO_123"]), + ("import foo.bar", &["foo.bar"]), + ("import foo.bar.baz", &["foo.bar.baz"]), + ("import foo, bar, bax", &["foo", "bar", "bax"]), + ("import foo as FOO", &["foo"]), + ("import foo as FOO, bar as BAR", &["foo", "bar"]), + ("import foo as FOO , bar as BAR", &["foo", "bar"]), + ("import foo # Comment", &["foo"]), + })] + fn test_parse_import_statement(case: (&str, &[&str])) { + parse_and_check(case); + } + + #[parameterized(case = { + ("from foo import bar", &["foo.bar"]), + ("from foo import bar_BAR_123", &["foo.bar_BAR_123"]), + ("from .foo import bar", &[".foo.bar"]), + ("from ..foo import bar", &["..foo.bar"]), + ("from . import foo", &[".foo"]), + ("from .. import foo", &["..foo"]), + ("from foo.bar import baz", &["foo.bar.baz"]), + ("from .foo.bar import baz", &[".foo.bar.baz"]), + ("from ..foo.bar import baz", &["..foo.bar.baz"]), + ("from foo import bar, baz, bax", &["foo.bar", "foo.baz", "foo.bax"]), + ("from foo import bar as BAR", &["foo.bar"]), + ("from foo import bar as BAR, baz as BAZ", &["foo.bar", "foo.baz"]), + ("from foo import bar as BAR , baz as BAZ", &["foo.bar", "foo.baz"]), + ("from foo import bar # Comment", &["foo.bar"]), + })] + fn test_parse_from_import_statement(case: (&str, &[&str])) { + parse_and_check(case); + } + + #[parameterized(case = { + ("from foo import (bar)", &["foo.bar"]), + ("from foo import (bar,)", &["foo.bar"]), + ("from foo import (bar, baz)", &["foo.bar", "foo.baz"]), + ("from foo import (bar, baz,)", &["foo.bar", "foo.baz"]), + ("from foo import (bar as BAR, baz as BAZ,)", &["foo.bar", "foo.baz"]), + ("from foo import ( bar as BAR , baz as BAZ , )", &["foo.bar", "foo.baz"]), + ("from foo import (bar, baz,) # Comment", &["foo.bar", "foo.baz"]), + + (r#" +from foo import ( + bar, + baz +) + "#, &["foo.bar", "foo.baz"]), + + (r#" +from foo import ( + bar, + baz, +) + "#, &["foo.bar", "foo.baz"]), + + (r#" +from foo import ( + a, b, + c, d, +) + "#, &["foo.a", "foo.b", "foo.c", "foo.d"]), + + // As name + (r#" +from foo import ( + bar as BAR, + baz as BAZ, +) + "#, &["foo.bar", "foo.baz"]), + + // Whitespace + (r#" +from foo import ( + + bar as BAR , + + baz as BAZ , + +) + "#, &["foo.bar", "foo.baz"]), + + // Comments + (r#" +from foo import ( # C + # C + bar as BAR, # C + # C + baz as BAZ, # C + # C +) # C + "#, &["foo.bar", "foo.baz"]), + })] + fn test_parse_multiline_from_import_statement(case: (&str, &[&str])) { + parse_and_check(case); + } + + #[parameterized(case = { + ("from foo import *", &["foo.*"]), + ("from .foo import *", &[".foo.*"]), + ("from ..foo import *", &["..foo.*"]), + ("from . import *", &[".*"]), + ("from .. import *", &["..*"]), + ("from foo import *", &["foo.*"]), + ("from foo import * # Comment", &["foo.*"]), + })] + fn test_parse_wildcard_from_import_statement(case: (&str, &[&str])) { + parse_and_check(case); + } + + #[parameterized(case = { + ("import a; import b", &["a", "b"]), + ("import a; import b;", &["a", "b"]), + ("import a ; import b ;", &["a", "b"]), + ("import a; import b # Comment", &["a", "b"]), + ("import a; from b import c; from d import (e); from f import *", &["a", "b.c", "d.e", "f.*"]), + })] + fn test_parse_import_statement_list(case: (&str, &[&str])) { + parse_and_check(case); + } + + #[parameterized(case = { + (r#" +import a, b, \ + c, d + "#, &["a", "b", "c", "d"]), + + (r#" +from foo import a, b, \ + c, d + "#, &["foo.a", "foo.b", "foo.c", "foo.d"]), + + (r#" +from foo \ + import * + "#, &["foo.*"]), + })] + fn test_backslash_continuation(case: (&str, &[&str])) { + parse_and_check(case); + } + + #[parameterized(case = { + (r#" +import a +def foo(): + import b +import c + "#, &["a", "b", "c"]), + + (r#" +import a +class Foo: + import b +import c + "#, &["a", "b", "c"]), + })] + fn test_parse_nested_imports(case: (&str, &[&str])) { + parse_and_check(case); + } + + #[parameterized(case = { + (r#" +import foo +if typing.TYPE_CHECKING: import bar +import baz +"#, &[("foo", false), ("bar", true), ("baz", false)]), + + (r#" +import foo +if TYPE_CHECKING: import bar +import baz +"#, &[("foo", false), ("bar", true), ("baz", false)]), + + (r#" +import foo +if TYPE_CHECKING : import bar +import baz +"#, &[("foo", false), ("bar", true), ("baz", false)]), + + (r#" +import foo +if TYPE_CHECKING: import bar as BAR +import baz +"#, &[("foo", false), ("bar", true), ("baz", false)]), + + (r#" +import foo # C +if TYPE_CHECKING: import bar # C +import baz # C +"#, &[("foo", false), ("bar", true), ("baz", false)]), + })] + fn test_singleline_if_typechecking(case: (&str, &[(&str, bool)])) { + parse_and_check_with_typechecking_only(case); + } + + #[parameterized(case = { + (r#" +import foo +if typing.TYPE_CHECKING: + import bar +import baz +"#, &[("foo", false), ("bar", true), ("baz", false)]), + + (r#" +import foo +if TYPE_CHECKING: + import bar +import baz +"#, &[("foo", false), ("bar", true), ("baz", false)]), + + (r#" +import foo + +if TYPE_CHECKING : + + import bar + +import baz +"#, &[("foo", false), ("bar", true), ("baz", false)]), + + (r#" +import foo +if TYPE_CHECKING: + import bar as BAR +import baz +"#, &[("foo", false), ("bar", true), ("baz", false)]), + + (r#" +import foo # C +if TYPE_CHECKING: # C + # C + import bar # C + # C +import baz # C +"#, &[("foo", false), ("bar", true), ("baz", false)]), + + (r#" +import foo +if TYPE_CHECKING: + """ + Comment + """ + import bar +import baz +"#, &[("foo", false), ("bar", true), ("baz", false)]), + })] + fn test_multiline_if_typechecking(case: (&str, &[(&str, bool)])) { + parse_and_check_with_typechecking_only(case); + } + + #[parameterized(case = { + (r#" +import foo +# import bar +import baz +"#, &["foo", "baz"]), + })] + fn test_comments(case: (&str, &[&str])) { + parse_and_check(case); + } + + #[parameterized(case = { + (r#" +import foo +""" +import bar +""" +import baz +"#, &["foo", "baz"]), + + (r#" +import foo +""" +import bar +""" # foo +import baz +"#, &["foo", "baz"]), + + (r#" +import foo +''' +import bar +''' +import baz +"#, &["foo", "baz"]), + + (r#" +import foo +s = """ +import bar +""" +import baz +"#, &["foo", "baz"]), + + (r#" +import foo +s = ''' +import bar +''' +import baz +"#, &["foo", "baz"]), + })] + fn test_multiline_strings(case: (&str, &[&str])) { + parse_and_check(case); + } + + #[parameterized(case = { + (r#" +import foo +x = 42 # """ +import bar +"#, &["foo", "bar"]), + +(r#" +import foo +print('"""') +import bar +"#, &["foo", "bar"]), + + (r#" +import foo +x = 42 # """ +import bar +x = 42 # """ +import baz +"#, &["foo", "bar", "baz"]), + })] + fn test_weird_inputs(case: (&str, &[&str])) { + parse_and_check(case); + } + + #[test] + fn test_parse_line_numbers() { + let imports = parse_imports_from_code( + " +import a +from b import c +from d import (e) +from f import *", + ) + .unwrap(); + assert_eq!( + vec![ + ("a".to_owned(), 2), + ("b.c".to_owned(), 3), + ("d.e".to_owned(), 4), + ("f.*".to_owned(), 5), + ], + imports + .into_iter() + .map(|i| (i.name, i.line_number)) + .collect::>() + ); + } + + #[test] + fn test_parse_line_numbers_if_typechecking() { + let imports = parse_imports_from_code( + " +import a +if TYPE_CHECKING: + from b import c +from d import (e) +if TYPE_CHECKING: + from f import *", + ) + .unwrap(); + assert_eq!( + vec![ + ("a".to_owned(), 2, false), + ("b.c".to_owned(), 4, true), + ("d.e".to_owned(), 5, false), + ("f.*".to_owned(), 7, true), + ], + imports + .into_iter() + .map(|i| (i.name, i.line_number, i.typechecking_only)) + .collect::>() + ); + } +} diff --git a/rust/src/lib.rs b/rust/src/lib.rs index 4609c2e4..18327cd2 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -1,10 +1,11 @@ pub mod errors; pub mod exceptions; pub mod graph; +pub mod import_parsing; pub mod module_expressions; use crate::errors::{GrimpError, GrimpResult}; -use crate::exceptions::{InvalidModuleExpression, ModuleNotPresent, NoSuchContainer}; +use crate::exceptions::{InvalidModuleExpression, ModuleNotPresent, NoSuchContainer, ParseError}; use crate::graph::higher_order_queries::Level; use crate::graph::{Graph, Module, ModuleIterator, ModuleTokenIterator}; use crate::module_expressions::ModuleExpression; @@ -20,6 +21,7 @@ use std::collections::HashSet; #[pymodule] fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_wrapped(wrap_pyfunction!(parse_imported_objects_from_code))?; m.add_class::()?; m.add("ModuleNotPresent", py.get_type::())?; m.add("NoSuchContainer", py.get_type::())?; @@ -27,9 +29,32 @@ fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { "InvalidModuleExpression", py.get_type::(), )?; + m.add("ParseError", py.get_type::())?; Ok(()) } +#[pyfunction] +fn parse_imported_objects_from_code<'py>( + py: Python<'py>, + module_code: &str, +) -> PyResult>> { + let imports = import_parsing::parse_imports_from_code(module_code)?; + + Ok(imports + .into_iter() + .map(|import| { + let dict = PyDict::new(py); + dict.set_item("name", import.name).unwrap(); + dict.set_item("line_number", import.line_number).unwrap(); + dict.set_item("line_contents", import.line_contents) + .unwrap(); + dict.set_item("typechecking_only", import.typechecking_only) + .unwrap(); + dict + }) + .collect()) +} + #[pyclass(name = "Graph")] struct GraphWrapper { _graph: Graph, diff --git a/src/grimp/adaptors/importscanner.py b/src/grimp/adaptors/importscanner.py index 863ebb64..a405facc 100644 --- a/src/grimp/adaptors/importscanner.py +++ b/src/grimp/adaptors/importscanner.py @@ -1,16 +1,16 @@ from __future__ import annotations -import ast import re import logging from dataclasses import dataclass -from typing import Dict, List, Optional, Set, Union -from ast import NodeVisitor, Import, ImportFrom, If, Attribute, Name +from typing import Dict, Optional, Set from grimp import exceptions from grimp.application.ports.importscanner import AbstractImportScanner from grimp.application.ports.modulefinder import FoundPackage from grimp.domain.valueobjects import DirectImport, Module +from grimp import _rustgrimp as rust # type: ignore[attr-defined] + logger = logging.getLogger(__name__) @@ -50,10 +50,10 @@ def scan_for_imports( try: imported_objects = self._get_raw_imported_objects(module_contents) - except SyntaxError as e: + except rust.ParseError as e: raise exceptions.SourceSyntaxError( filename=module_filename, - lineno=e.lineno, + lineno=e.line_number, text=e.text, ) @@ -138,16 +138,8 @@ def _module_is_package(self, module_filename: str) -> bool: @staticmethod def _get_raw_imported_objects(module_contents: str) -> Set[_ImportedObject]: - module_lines = module_contents.splitlines() - - ast_tree = ast.parse(module_contents) - - visitor = _TreeVisitor( - module_lines=module_lines, - ) - visitor.visit(ast_tree) - - return visitor.imported_objects + imported_object_dicts = rust.parse_imported_objects_from_code(module_contents) + return {_ImportedObject(**d) for d in imported_object_dicts} @staticmethod def _get_absolute_imported_object_name( @@ -237,87 +229,3 @@ def _distill_external_module( return deepest_candidate_portion else: return module.root - - -class _TreeVisitor(NodeVisitor): - def __init__( - self, - module_lines: List[str], - ) -> None: - self.import_parser = _ImportNodeParser() - self.from_import_parser = _ImportFromNodeParser() - self.module_lines = module_lines - - self.imported_objects: Set[_ImportedObject] = set() - self.typechecking_only = False - - super().__init__() - - def visit_Import(self, node: Import) -> None: - self._parse_imported_objects_from_node(node, self.import_parser) - - def visit_ImportFrom(self, node: ImportFrom) -> None: - self._parse_imported_objects_from_node(node, self.from_import_parser) - - def visit_If(self, node: If) -> None: - if (isinstance(node.test, Name) and node.test.id == "TYPE_CHECKING") or ( - isinstance(node.test, Attribute) and node.test.attr == "TYPE_CHECKING" - ): - self.typechecking_only = True - super().generic_visit(node) - self.typechecking_only = False - else: - super().generic_visit(node) - - def _parse_imported_objects_from_node( - self, - node: Union[Import, ImportFrom], - parser: Union[_ImportNodeParser, _ImportFromNodeParser], - ) -> None: - for imported_object in parser.determine_imported_objects(node): - self.imported_objects.add( - _ImportedObject( - name=imported_object, - line_number=node.lineno, - line_contents=self.module_lines[node.lineno - 1].strip(), - typechecking_only=self.typechecking_only, - ) - ) - - -class _ImportNodeParser: - """ - Parser for statements in the form 'import x'. - """ - - node_class = ast.Import - - def determine_imported_objects(self, node: ast.AST) -> Set[str]: - imported_objects: Set[str] = set() - assert isinstance(node, self.node_class) # For type checker. - for alias in node.names: - imported_object = alias.name - imported_objects.add(imported_object) - return imported_objects - - -class _ImportFromNodeParser: - """ - Parser for statements in the form 'from x import ...'. - """ - - node_class = ast.ImportFrom - - def determine_imported_objects(self, node: ast.AST) -> Set[str]: - imported_objects: Set[str] = set() - assert isinstance(node, self.node_class) # For type checker. - assert isinstance(node.level, int) # For type checker. - - for alias in node.names: - if node.module is None: - imported_object = f"{'.' * node.level}{alias.name}" - else: - imported_object = f"{'.' * node.level}{node.module}.{alias.name}" - imported_objects.add(imported_object) - - return imported_objects diff --git a/tests/functional/test_error_handling.py b/tests/functional/test_error_handling.py index 2e0cd137..8cf1c804 100644 --- a/tests/functional/test_error_handling.py +++ b/tests/functional/test_error_handling.py @@ -15,9 +15,7 @@ def test_syntax_error_includes_module(): with pytest.raises(exceptions.SourceSyntaxError) as excinfo: build_graph("syntaxerrorpackage", cache_dir=None) - expected_exception = exceptions.SourceSyntaxError( - filename=filename, lineno=5, text="fromb . import two\n" - ) + expected_exception = exceptions.SourceSyntaxError(filename=filename, lineno=5, text="import") assert expected_exception == excinfo.value diff --git a/tests/unit/adaptors/test_importscanner.py b/tests/unit/adaptors/test_importscanner.py index f01480ee..fca4ab08 100644 --- a/tests/unit/adaptors/test_importscanner.py +++ b/tests/unit/adaptors/test_importscanner.py @@ -2,6 +2,7 @@ import pytest # type: ignore +from textwrap import dedent from grimp.adaptors.importscanner import ImportScanner, _ImportedObject from grimp.application.ports.modulefinder import FoundPackage, ModuleFile from grimp.domain.valueobjects import DirectImport, Module @@ -41,7 +42,7 @@ importer=Module("foo.one"), imported=Module("externaltwo"), line_number=3, - line_contents="import externaltwo.subpackage", + line_contents="import externaltwo.subpackage # with comment afterwards.", ), }, ), @@ -54,7 +55,7 @@ def test_absolute_imports(include_external_packages, expected_result): "/path/to/foo/one.py": """ import foo.two import externalone - import externaltwo.subpackage + import externaltwo.subpackage # with comment afterwards. arbitrary_expression = 1 """ } @@ -344,7 +345,7 @@ def test_import_of_portion_not_in_graph(include_external_packages): importer=Module("foo.one.blue"), imported=Module("external"), line_number=6, - line_contents="from external.two import blue", + line_contents="from external.two import blue # with comment afterwards.", ), }, ), @@ -382,7 +383,7 @@ def test_absolute_from_imports(include_external_packages, expected_result): if t.TYPE_CHECKING: from foo import three from external import one - from external.two import blue + from external.two import blue # with comment afterwards. arbitrary_expression = 1 """ }, @@ -844,64 +845,87 @@ def _modules_to_module_files(modules: Set[Module]) -> Set[ModuleFile]: return {ModuleFile(module=module, mtime=some_mtime) for module in modules} -def test_get_raw_imports(): - module_contents = """\ -import a -if TYPE_CHECKING: - import b -from c import d -from .e import f -from . import g -from .. import h -from i import * -""" - - raw_imported_objects = ImportScanner._get_raw_imported_objects(module_contents) +class TestGetRawImportedObjects: + def test_get_raw_imports(self): + module_contents = dedent( + """\ + import a + if TYPE_CHECKING: + import b + from c import d + from .e import f + from . import g + from .. import h + from i import * + from ñon_ascii_变 import * + from . import ñon_ascii_变 + import ñon_ascii_变.ラーメン + """ + ) + raw_imported_objects = ImportScanner._get_raw_imported_objects(module_contents) - assert raw_imported_objects == { - _ImportedObject( - name="a", - line_number=1, - line_contents="import a", - typechecking_only=False, - ), - _ImportedObject( - name="b", - line_number=3, - line_contents="import b", - typechecking_only=True, - ), - _ImportedObject( - name="c.d", - line_number=4, - line_contents="from c import d", - typechecking_only=False, - ), - _ImportedObject( - name=".e.f", - line_number=5, - line_contents="from .e import f", - typechecking_only=False, - ), - _ImportedObject( - name=".g", - line_number=6, - line_contents="from . import g", - typechecking_only=False, - ), - _ImportedObject( - name="..h", - line_number=7, - line_contents="from .. import h", - typechecking_only=False, - ), - _ImportedObject( - name="i.*", - line_number=8, - line_contents="from i import *", - typechecking_only=False, - ), - } + assert raw_imported_objects == { + _ImportedObject( + name="a", + line_number=1, + line_contents="import a", + typechecking_only=False, + ), + _ImportedObject( + name="b", + line_number=3, + line_contents="import b", + typechecking_only=True, + ), + _ImportedObject( + name="c.d", + line_number=4, + line_contents="from c import d", + typechecking_only=False, + ), + _ImportedObject( + name=".e.f", + line_number=5, + line_contents="from .e import f", + typechecking_only=False, + ), + _ImportedObject( + name=".g", + line_number=6, + line_contents="from . import g", + typechecking_only=False, + ), + _ImportedObject( + name="..h", + line_number=7, + line_contents="from .. import h", + typechecking_only=False, + ), + _ImportedObject( + name="i.*", + line_number=8, + line_contents="from i import *", + typechecking_only=False, + ), + _ImportedObject( + name="ñon_ascii_变.*", + line_number=9, + line_contents="from ñon_ascii_变 import *", + typechecking_only=False, + ), + _ImportedObject( + name=".ñon_ascii_变", + line_number=10, + line_contents="from . import ñon_ascii_变", + typechecking_only=False, + ), + _ImportedObject( + name="ñon_ascii_变.ラーメン", + line_number=11, + line_contents="import ñon_ascii_变.ラーメン", + typechecking_only=False, + ), + } @pytest.mark.parametrize(