From dde11a0d719f4d4ed10c857809e2d9c243d3b8e8 Mon Sep 17 00:00:00 2001
From: David Seddon <david.seddon@octoenergy.com>
Date: Fri, 2 May 2025 10:28:07 +0100
Subject: [PATCH 1/8] Copy pyimportparse into parsing.rs

---
 rust/Cargo.lock     |  66 ++++
 rust/Cargo.toml     |   4 +
 rust/src/parsing.rs | 755 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 825 insertions(+)
 create mode 100644 rust/src/parsing.rs

diff --git a/rust/Cargo.lock b/rust/Cargo.lock
index 5e4650e5..f60f25f0 100644
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -13,7 +13,11 @@ dependencies = [
  "indexmap 2.7.1",
  "itertools",
  "lazy_static",
+ "nom 8.0.0",
+ "nom-regex",
+ "nom_locate",
  "parameterized",
+ "pyimportparse",
  "pyo3",
  "rayon",
  "regex",
@@ -46,6 +50,12 @@ version = "0.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "230c5f1ca6a325a32553f8640d31ac9b49f2411e901e427570154868b46da4f7"
 
+[[package]]
+name = "bytecount"
+version = "0.6.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce"
+
 [[package]]
 name = "cfg-if"
 version = "1.0.0"
@@ -227,6 +237,52 @@ dependencies = [
  "autocfg",
 ]
 
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "nom"
+version = "8.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "nom-regex"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72e5c7731c4c1370b61604ed52a2475e861aac9e08dec9f23903d4ddfdc91c18"
+dependencies = [
+ "nom 7.1.3",
+ "regex",
+]
+
+[[package]]
+name = "nom_locate"
+version = "5.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b577e2d69827c4740cba2b52efaad1c4cc7c73042860b199710b3575c68438d"
+dependencies = [
+ "bytecount",
+ "memchr",
+ "nom 8.0.0",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.20.2"
@@ -291,6 +347,16 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "pyimportparse"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49f9c2424bb74b5fdc6d38e75c904890a8ac4d2ff52ba3da87cec4c8a412148b"
+dependencies = [
+ "nom 8.0.0",
+ "nom_locate",
+]
+
 [[package]]
 name = "pyo3"
 version = "0.24.1"
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 3e6a9457..94e81cc1 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -22,6 +22,10 @@ rustc-hash = "2.1.0"
 indexmap = "2.7.1"
 regex = "1.11.1"
 const_format = "0.2.34"
+pyimportparse = "0.2.1"
+nom = "8.0.0"
+nom_locate = "5.0.0"
+nom-regex = "0.2.0"
 
 [dependencies.pyo3]
 version = "0.24.1"
diff --git a/rust/src/parsing.rs b/rust/src/parsing.rs
new file mode 100644
index 00000000..ee10f23c
--- /dev/null
+++ b/rust/src/parsing.rs
@@ -0,0 +1,755 @@
+// This file was originally copied from the pyimportparse crate.
+// License copied below, as per terms of the license.
+//
+// MIT License
+//
+// Copyright (c) 2025 Peter Byfield
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+
+
+use nom::branch::alt;
+use nom::bytes::complete::{tag, take_until};
+use nom::character::complete::{
+    alphanumeric1, line_ending, multispace1, not_line_ending, space0, space1,
+};
+use nom::combinator::{all_consuming, opt, recognize, value, verify};
+use nom::multi::{many0, many1, separated_list1};
+use nom::sequence::{delimited, preceded, terminated};
+use nom::{IResult, Input, Parser};
+use nom_locate::{LocatedSpan, position};
+
+type Span<'a> = LocatedSpan<&'a str>;
+
+#[derive(Debug, Eq, PartialEq, Clone)]
+pub struct Import {
+    pub imported_object: String,
+    pub line_number: u32,
+    pub line_contents: String,
+    pub typechecking_only: bool,
+}
+
+impl Import {
+    pub fn new(
+        imported_object: String,
+        line_number: u32,
+        line_contents: String,
+        typechecking_only: bool,
+    ) -> Self {
+        Self {
+            imported_object,
+            line_number,
+            line_contents,
+            typechecking_only,
+        }
+    }
+}
+
+pub fn parse_imports(s: &str) -> Result<Vec<Import>, String> {
+    let s = Span::new(s);
+    let (_, result) = all_consuming(parse_block(false))
+        .parse(s)
+        .map_err(|e| e.to_string())?;
+    Ok(result)
+}
+
+fn parse_block(typechecking_only: bool) -> impl Fn(Span) -> IResult<Span, Vec<Import>> {
+    move |s| {
+        let (s, result) = many0(alt((
+            parse_if_typechecking,
+            value(vec![], parse_space1),
+            value(vec![], line_ending),
+            value(vec![], parse_multiline_comment),
+            value(vec![], parse_comment),
+            parse_import_statement_list(typechecking_only),
+            value(vec![], verify(not_line_ending, |s: &Span| !s.is_empty())),
+        )))
+            .parse(s)?;
+        Ok((s, result.into_iter().flatten().collect()))
+    }
+}
+
+fn parse_import_statement_list(
+    typechecking_only: bool,
+) -> impl Fn(Span) -> IResult<Span, Vec<Import>> {
+    move |s| {
+        let (s, imports) = separated_list1(
+            delimited(parse_space0, tag(";"), parse_space0),
+            alt((
+                parse_import_statement(typechecking_only),
+                parse_from_import_statement(typechecking_only),
+                parse_multiline_from_import_statement(typechecking_only),
+                parse_wildcard_from_import_statement(typechecking_only),
+            )),
+        )
+            .parse(s)?;
+        let (s, _) = (opt(parse_space0), opt(tag(";"))).parse(s)?;
+        Ok((s, imports.into_iter().flatten().collect()))
+    }
+}
+
+fn parse_import_statement(typechecking_only: bool) -> impl Fn(Span) -> IResult<Span, Vec<Import>> {
+    move |s| {
+        let input = s;
+        let (s, position) = position.parse(s)?;
+        let (s, _) = (tag("import"), parse_space1).parse(s)?;
+        let (s, imported_modules) = separated_list1(
+            delimited(parse_space0, tag(","), parse_space0),
+            terminated(
+                parse_module,
+                opt((parse_space1, tag("as"), parse_space1, parse_identifier)),
+            ),
+        )
+            .parse(s)?;
+
+        let (_, span) = input.take_split(s.location_offset() - input.location_offset());
+        Ok((
+            s,
+            imported_modules
+                .into_iter()
+                .map(|imported_module| {
+                    Import::new(
+                        imported_module.to_owned(),
+                        position.location_line(),
+                        (*span.fragment()).to_owned(),
+                        typechecking_only,
+                    )
+                })
+                .collect(),
+        ))
+    }
+}
+
+fn parse_from_import_statement(
+    typechecking_only: bool,
+) -> impl Fn(Span) -> IResult<Span, Vec<Import>> {
+    move |s| {
+        let input = s;
+        let (s, position) = position.parse(s)?;
+        let (s, _) = (tag("from"), parse_space1).parse(s)?;
+        let (s, imported_module_base) = parse_relative_module.parse(s)?;
+        let (s, _) = (parse_space1, tag("import"), parse_space1).parse(s)?;
+
+        let (s, imported_identifiers) = separated_list1(
+            delimited(parse_space0, tag(","), parse_space0),
+            terminated(
+                parse_identifier,
+                opt((parse_space1, tag("as"), parse_space1, parse_identifier)),
+            ),
+        )
+            .parse(s)?;
+
+        let (_, span) = input.take_split(s.location_offset() - input.location_offset());
+        Ok((
+            s,
+            imported_identifiers
+                .into_iter()
+                .map(|imported_identifier| {
+                    let imported_object = if imported_module_base.ends_with(".") {
+                        format!("{}{}", imported_module_base, imported_identifier)
+                    } else {
+                        format!("{}.{}", imported_module_base, imported_identifier)
+                    };
+                    Import::new(
+                        imported_object,
+                        position.location_line(),
+                        (*span.fragment()).to_owned(),
+                        typechecking_only,
+                    )
+                })
+                .collect(),
+        ))
+    }
+}
+
+fn parse_multiline_from_import_statement(
+    typechecking_only: bool,
+) -> impl Fn(Span) -> IResult<Span, Vec<Import>> {
+    move |s| {
+        let input = s;
+        let (s, position) = position.parse(s)?;
+        let (s, _) = (tag("from"), parse_space1).parse(s)?;
+        let (s, imported_module_base) = parse_relative_module.parse(s)?;
+        let (s, _) = (parse_space1, tag("import"), parse_space1).parse(s)?;
+
+        let (s, imported_identifiers) = delimited(
+            (tag("("), parse_multispace0_or_comment),
+            separated_list1(
+                delimited(
+                    parse_multispace0_or_comment,
+                    tag(","),
+                    parse_multispace0_or_comment,
+                ),
+                terminated(
+                    parse_identifier,
+                    opt((multispace1, tag("as"), multispace1, parse_identifier)),
+                ),
+            ),
+            (
+                parse_multispace0_or_comment,
+                opt(tag(",")),
+                parse_multispace0_or_comment,
+                tag(")"),
+            ),
+        )
+            .parse(s)?;
+
+        let (_, span) = input.take_split(s.location_offset() - input.location_offset());
+        Ok((
+            s,
+            imported_identifiers
+                .into_iter()
+                .map(|imported_identifier| {
+                    let imported_object = if imported_module_base.ends_with(".") {
+                        format!("{}{}", imported_module_base, imported_identifier)
+                    } else {
+                        format!("{}.{}", imported_module_base, imported_identifier)
+                    };
+                    Import::new(
+                        imported_object,
+                        position.location_line(),
+                        (*span.fragment()).to_owned(),
+                        typechecking_only,
+                    )
+                })
+                .collect(),
+        ))
+    }
+}
+
+fn parse_wildcard_from_import_statement(
+    typechecking_only: bool,
+) -> impl Fn(Span) -> IResult<Span, Vec<Import>> {
+    move |s| {
+        let input = s;
+        let (s, position) = position.parse(s)?;
+        let (s, _) = (tag("from"), parse_space1).parse(s)?;
+        let (s, imported_module) = parse_relative_module.parse(s)?;
+        let (s, _) = (parse_space1, tag("import"), parse_space1, tag("*")).parse(s)?;
+
+        let imported_object = if imported_module.ends_with(".") {
+            format!("{}*", imported_module)
+        } else {
+            format!("{}.*", imported_module)
+        };
+
+        let (_, span) = input.take_split(s.location_offset() - input.location_offset());
+        Ok((
+            s,
+            vec![Import::new(
+                imported_object,
+                position.location_line(),
+                (*span.fragment()).to_owned(),
+                typechecking_only,
+            )],
+        ))
+    }
+}
+
+fn parse_module(s: Span) -> IResult<Span, &str> {
+    let (s, result) = recognize(separated_list1(tag("."), parse_identifier)).parse(s)?;
+    Ok((s, result.fragment()))
+}
+
+fn parse_relative_module(s: Span) -> IResult<Span, &str> {
+    let (s, result) = alt((
+        recognize((many0(tag(".")), parse_module)),
+        recognize(many1(tag("."))),
+    ))
+        .parse(s)?;
+    Ok((s, result.fragment()))
+}
+
+fn parse_identifier(s: Span) -> IResult<Span, &str> {
+    let (s, result) = recognize(many1(alt((alphanumeric1, tag("_"))))).parse(s)?;
+    Ok((s, result.fragment()))
+}
+
+fn parse_comment(s: Span) -> IResult<Span, ()> {
+    let (s, _) = (tag("#"), not_line_ending).parse(s)?;
+    Ok((s, ()))
+}
+
+fn parse_multispace0_or_comment(s: Span) -> IResult<Span, ()> {
+    let (s, _) = many0(alt((value((), multispace1), parse_comment))).parse(s)?;
+    Ok((s, ()))
+}
+
+fn parse_multiline_comment(s: Span) -> IResult<Span, ()> {
+    let (s, _) = alt((
+        delimited(tag(r#"""""#), take_until(r#"""""#), tag(r#"""""#)),
+        delimited(tag(r#"'''"#), take_until(r#"'''"#), tag(r#"'''"#)),
+    ))
+        .parse(s)?;
+    Ok((s, ()))
+}
+
+fn parse_space0(s: Span) -> IResult<Span, ()> {
+    let (s, _) = many0(alt((space1, tag("\\\n")))).parse(s)?;
+    Ok((s, ()))
+}
+
+fn parse_space1(s: Span) -> IResult<Span, ()> {
+    let (s, _) = many1(alt((space1, tag("\\\n")))).parse(s)?;
+    Ok((s, ()))
+}
+
+fn parse_if_typechecking(s: Span) -> IResult<Span, Vec<Import>> {
+    let (s, _) = (
+        tag("if"),
+        parse_space1,
+        alt((tag("TYPE_CHECKING"), tag("typing.TYPE_CHECKING"))),
+        parse_space0,
+        tag(":"),
+    )
+        .parse(s)?;
+
+    if let Ok((s, imports)) = preceded(
+        parse_space0,
+        terminated(
+            parse_import_statement_list(true),
+            (parse_space0, opt(parse_comment)),
+        ),
+    )
+        .parse(s)
+    {
+        return Ok((s, imports));
+    };
+
+    let (s, _) = (parse_space0, opt(parse_comment), line_ending).parse(s)?;
+    let (s, indented_block) = parse_indented_block.parse(s)?;
+    let (_, imports) = all_consuming(parse_block(true)).parse(indented_block)?;
+    Ok((s, imports))
+}
+
+fn parse_indented_block(s: Span) -> IResult<Span, Span> {
+    let input = s;
+
+    let (s, _) = many0((space0, line_ending)).parse(s)?;
+    let (s, (indentation, _, _)) = (space1, not_line_ending, opt(line_ending)).parse(s)?;
+
+    let (s, _) = many0(alt((
+        value((), (space0, line_ending)),
+        value(
+            (),
+            (
+                tag(*indentation.fragment()),
+                not_line_ending,
+                opt(line_ending),
+            ),
+        ),
+    )))
+        .parse(s)?;
+
+    Ok(input.take_split(s.location_offset() - input.location_offset()))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::parse_imports;
+    use parameterized::parameterized;
+
+    #[test]
+    fn test_parse_empty_string() {
+        let imports = parse_imports("").unwrap();
+        assert!(imports.is_empty());
+    }
+
+    fn parse_and_check(case: (&str, &[&str])) {
+        let (code, expected_imports) = case;
+        let imports = parse_imports(code).unwrap();
+        assert_eq!(
+            expected_imports,
+            imports
+                .into_iter()
+                .map(|i| i.imported_object)
+                .collect::<Vec<_>>()
+        );
+    }
+
+    fn parse_and_check_with_typechecking_only(case: (&str, &[(&str, bool)])) {
+        let (code, expected_imports) = case;
+        let imports = parse_imports(code).unwrap();
+        assert_eq!(
+            expected_imports
+                .iter()
+                .map(|i| (i.0.to_owned(), i.1))
+                .collect::<Vec<_>>(),
+            imports
+                .into_iter()
+                .map(|i| (i.imported_object, i.typechecking_only))
+                .collect::<Vec<_>>()
+        );
+    }
+
+    #[parameterized(case = {
+        ("import foo", &["foo"]),
+        ("import foo_FOO_123", &["foo_FOO_123"]),
+        ("import foo.bar", &["foo.bar"]),
+        ("import foo.bar.baz", &["foo.bar.baz"]),
+        ("import foo, bar, bax", &["foo", "bar", "bax"]),
+        ("import foo as FOO", &["foo"]),
+        ("import foo as FOO, bar as BAR", &["foo", "bar"]),
+        ("import  foo  as  FOO ,  bar  as  BAR", &["foo", "bar"]),
+        ("import foo # Comment", &["foo"]),
+    })]
+    fn test_parse_import_statement(case: (&str, &[&str])) {
+        parse_and_check(case);
+    }
+
+    #[parameterized(case = {
+        ("from foo import bar", &["foo.bar"]),
+        ("from foo import bar_BAR_123", &["foo.bar_BAR_123"]),
+        ("from .foo import bar", &[".foo.bar"]),
+        ("from ..foo import bar", &["..foo.bar"]),
+        ("from . import foo", &[".foo"]),
+        ("from .. import foo", &["..foo"]),
+        ("from foo.bar import baz", &["foo.bar.baz"]),
+        ("from .foo.bar import baz", &[".foo.bar.baz"]),
+        ("from ..foo.bar import baz", &["..foo.bar.baz"]),
+        ("from foo import bar, baz, bax", &["foo.bar", "foo.baz", "foo.bax"]),
+        ("from foo import bar as BAR", &["foo.bar"]),
+        ("from foo import bar as BAR, baz as BAZ", &["foo.bar", "foo.baz"]),
+        ("from  foo  import  bar  as  BAR ,  baz  as  BAZ", &["foo.bar", "foo.baz"]),
+        ("from foo import bar # Comment", &["foo.bar"]),
+    })]
+    fn test_parse_from_import_statement(case: (&str, &[&str])) {
+        parse_and_check(case);
+    }
+
+    #[parameterized(case = {
+        ("from foo import (bar)", &["foo.bar"]),
+        ("from foo import (bar,)", &["foo.bar"]),
+        ("from foo import (bar, baz)", &["foo.bar", "foo.baz"]),
+        ("from foo import (bar, baz,)", &["foo.bar", "foo.baz"]),
+        ("from foo import (bar as BAR, baz as BAZ,)", &["foo.bar", "foo.baz"]),
+        ("from  foo  import  ( bar  as  BAR , baz  as  BAZ , )", &["foo.bar", "foo.baz"]),
+        ("from foo import (bar, baz,) # Comment", &["foo.bar", "foo.baz"]),
+
+        (r#"
+from foo import (
+    bar,
+    baz
+)
+        "#, &["foo.bar", "foo.baz"]),
+
+        (r#"
+from foo import (
+    bar,
+    baz,
+)
+        "#, &["foo.bar", "foo.baz"]),
+
+        (r#"
+from foo import (
+    a, b,
+    c, d,
+)
+        "#, &["foo.a", "foo.b", "foo.c", "foo.d"]),
+
+        // As name
+        (r#"
+from foo import (
+    bar as BAR,
+    baz as BAZ,
+)
+        "#, &["foo.bar", "foo.baz"]),
+
+        // Whitespace
+        (r#"
+from  foo  import  (
+
+    bar  as  BAR ,
+
+       baz  as  BAZ ,
+
+)
+        "#, &["foo.bar", "foo.baz"]),
+
+        // Comments
+        (r#"
+from foo import ( # C
+    # C
+    bar as BAR, # C
+    # C
+    baz as BAZ, # C
+    # C
+) # C
+        "#, &["foo.bar", "foo.baz"]),
+    })]
+    fn test_parse_multiline_from_import_statement(case: (&str, &[&str])) {
+        parse_and_check(case);
+    }
+
+    #[parameterized(case = {
+        ("from foo import *", &["foo.*"]),
+        ("from .foo import *", &[".foo.*"]),
+        ("from ..foo import *", &["..foo.*"]),
+        ("from . import *", &[".*"]),
+        ("from .. import *", &["..*"]),
+        ("from  foo  import  *", &["foo.*"]),
+        ("from foo import * # Comment", &["foo.*"]),
+    })]
+    fn test_parse_wildcard_from_import_statement(case: (&str, &[&str])) {
+        parse_and_check(case);
+    }
+
+    #[parameterized(case = {
+        ("import a; import b", &["a", "b"]),
+        ("import a; import b;", &["a", "b"]),
+        ("import  a ;  import  b ;", &["a", "b"]),
+        ("import a; import b # Comment", &["a", "b"]),
+        ("import a; from b import c; from d import (e); from f import *", &["a", "b.c", "d.e", "f.*"]),
+    })]
+    fn test_parse_import_statement_list(case: (&str, &[&str])) {
+        parse_and_check(case);
+    }
+
+    #[parameterized(case = {
+        (r#"
+import a, b, \
+       c, d
+        "#, &["a", "b", "c", "d"]),
+
+        (r#"
+from foo import a, b, \
+                c, d
+        "#, &["foo.a", "foo.b", "foo.c", "foo.d"]),
+
+        (r#"
+from foo \
+    import *
+        "#, &["foo.*"]),
+    })]
+    fn test_backslash_continuation(case: (&str, &[&str])) {
+        parse_and_check(case);
+    }
+
+    #[parameterized(case = {
+        (r#"
+import a
+def foo():
+    import b
+import c
+        "#, &["a", "b", "c"]),
+
+        (r#"
+import a
+class Foo:
+    import b
+import c
+        "#, &["a", "b", "c"]),
+    })]
+    fn test_parse_nested_imports(case: (&str, &[&str])) {
+        parse_and_check(case);
+    }
+
+    #[parameterized(case = {
+        (r#"
+import foo
+if typing.TYPE_CHECKING: import bar
+import baz
+"#, &[("foo", false), ("bar", true), ("baz", false)]),
+
+        (r#"
+import foo
+if TYPE_CHECKING: import bar
+import baz
+"#, &[("foo", false), ("bar", true), ("baz", false)]),
+
+        (r#"
+import foo
+if  TYPE_CHECKING :  import bar
+import baz
+"#, &[("foo", false), ("bar", true), ("baz", false)]),
+
+        (r#"
+import foo
+if TYPE_CHECKING: import bar as BAR
+import baz
+"#, &[("foo", false), ("bar", true), ("baz", false)]),
+
+        (r#"
+import foo # C
+if TYPE_CHECKING: import bar # C
+import baz # C
+"#, &[("foo", false), ("bar", true), ("baz", false)]),
+    })]
+    fn test_singleline_if_typechecking(case: (&str, &[(&str, bool)])) {
+        parse_and_check_with_typechecking_only(case);
+    }
+
+    #[parameterized(case = {
+        (r#"
+import foo
+if typing.TYPE_CHECKING:
+    import bar
+import baz
+"#, &[("foo", false), ("bar", true), ("baz", false)]),
+
+        (r#"
+import foo
+if TYPE_CHECKING:
+    import bar
+import baz
+"#, &[("foo", false), ("bar", true), ("baz", false)]),
+
+        (r#"
+import  foo
+
+if  TYPE_CHECKING :
+
+    import  bar
+
+import  baz
+"#, &[("foo", false), ("bar", true), ("baz", false)]),
+
+        (r#"
+import foo
+if TYPE_CHECKING:
+    import bar as BAR
+import baz
+"#, &[("foo", false), ("bar", true), ("baz", false)]),
+
+        (r#"
+import foo # C
+if TYPE_CHECKING: # C
+    # C
+    import bar # C
+    # C
+import baz # C
+"#, &[("foo", false), ("bar", true), ("baz", false)]),
+
+        (r#"
+import foo
+if TYPE_CHECKING:
+    """
+    Comment
+    """
+    import bar
+import baz
+"#, &[("foo", false), ("bar", true), ("baz", false)]),
+    })]
+    fn test_multiline_if_typechecking(case: (&str, &[(&str, bool)])) {
+        parse_and_check_with_typechecking_only(case);
+    }
+
+    #[parameterized(case = {
+        (r#"
+import foo
+"""
+import bar
+"""
+import baz
+"#, &["foo", "baz"]),
+
+        (r#"
+import foo
+"""
+import bar
+""" # foo
+import baz
+"#, &["foo", "baz"]),
+
+        (r#"
+import foo
+'''
+import bar
+'''
+import baz
+"#, &["foo", "baz"]),
+    })]
+    fn test_multiline_strings(case: (&str, &[&str])) {
+        parse_and_check(case);
+    }
+
+    #[test]
+    fn test_parse_line_numbers() {
+        let imports = parse_imports(
+            "
+import a
+from b import c
+from d import (e)
+from f import *",
+        )
+            .unwrap();
+        assert_eq!(
+            vec![
+                ("a".to_owned(), 2_u32),
+                ("b.c".to_owned(), 3_u32),
+                ("d.e".to_owned(), 4_u32),
+                ("f.*".to_owned(), 5_u32),
+            ],
+            imports
+                .into_iter()
+                .map(|i| (i.imported_object, i.line_number))
+                .collect::<Vec<_>>()
+        );
+    }
+
+    #[test]
+    fn test_parse_line_numbers_if_typechecking() {
+        let imports = parse_imports(
+            "
+import a
+if TYPE_CHECKING:
+    from b import c
+from d import (e)
+if TYPE_CHECKING:
+    from f import *",
+        )
+            .unwrap();
+        assert_eq!(
+            vec![
+                ("a".to_owned(), 2_u32, false),
+                ("b.c".to_owned(), 4_u32, true),
+                ("d.e".to_owned(), 5_u32, false),
+                ("f.*".to_owned(), 7_u32, true),
+            ],
+            imports
+                .into_iter()
+                .map(|i| (i.imported_object, i.line_number, i.typechecking_only))
+                .collect::<Vec<_>>()
+        );
+    }
+
+    #[test]
+    fn test_parse_line_contents() {
+        let imports = parse_imports(
+            "
+import a
+from b import c
+from d import (e)
+from f import *",
+        )
+            .unwrap();
+        assert_eq!(
+            vec![
+                ("a".to_owned(), "import a".to_owned()),
+                ("b.c".to_owned(), "from b import c".to_owned()),
+                ("d.e".to_owned(), "from d import (e)".to_owned()),
+                ("f.*".to_owned(), "from f import *".to_owned()),
+            ],
+            imports
+                .into_iter()
+                .map(|i| (i.imported_object, i.line_contents))
+                .collect::<Vec<_>>()
+        );
+    }
+}
\ No newline at end of file

From 598f14af10b646eb0a62e8bca4174ae95a457770 Mon Sep 17 00:00:00 2001
From: David Seddon <david.seddon@octoenergy.com>
Date: Fri, 2 May 2025 11:25:05 +0100
Subject: [PATCH 2/8] Add Python test that comments are included

---
 tests/unit/adaptors/test_importscanner.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/unit/adaptors/test_importscanner.py b/tests/unit/adaptors/test_importscanner.py
index f01480ee..601b9108 100644
--- a/tests/unit/adaptors/test_importscanner.py
+++ b/tests/unit/adaptors/test_importscanner.py
@@ -41,7 +41,7 @@
                     importer=Module("foo.one"),
                     imported=Module("externaltwo"),
                     line_number=3,
-                    line_contents="import externaltwo.subpackage",
+                    line_contents="import externaltwo.subpackage  # with comment afterwards.",
                 ),
             },
         ),
@@ -54,7 +54,7 @@ def test_absolute_imports(include_external_packages, expected_result):
             "/path/to/foo/one.py": """
                 import foo.two
                 import externalone
-                import externaltwo.subpackage
+                import externaltwo.subpackage  # with comment afterwards.
                 arbitrary_expression = 1
             """
         }
@@ -344,7 +344,7 @@ def test_import_of_portion_not_in_graph(include_external_packages):
                     importer=Module("foo.one.blue"),
                     imported=Module("external"),
                     line_number=6,
-                    line_contents="from external.two import blue",
+                    line_contents="from external.two import blue  # with comment afterwards.",
                 ),
             },
         ),
@@ -382,7 +382,7 @@ def test_absolute_from_imports(include_external_packages, expected_result):
                 if t.TYPE_CHECKING:
                     from foo import three
                 from external import one
-                from external.two import blue
+                from external.two import blue  # with comment afterwards.
                 arbitrary_expression = 1
             """
         },

From eab7b15cb50a8936e0dcf7fe4c2b7ec1aca6df43 Mon Sep 17 00:00:00 2001
From: David Seddon <david.seddon@octoenergy.com>
Date: Fri, 2 May 2025 10:28:38 +0100
Subject: [PATCH 3/8] Expose parse_to_imported_objects function

---
 rust/src/lib.rs | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/rust/src/lib.rs b/rust/src/lib.rs
index b4506b0d..994d92e7 100644
--- a/rust/src/lib.rs
+++ b/rust/src/lib.rs
@@ -2,6 +2,7 @@ pub mod errors;
 pub mod exceptions;
 pub mod graph;
 pub mod module_expressions;
+mod parsing;
 
 use crate::errors::{GrimpError, GrimpResult};
 use crate::exceptions::{InvalidModuleExpression, ModuleNotPresent, NoSuchContainer};
@@ -13,13 +14,15 @@ use itertools::Itertools;
 use pyo3::exceptions::PyValueError;
 use pyo3::prelude::*;
 use pyo3::types::{IntoPyDict, PyDict, PyFrozenSet, PyList, PySet, PyString, PyTuple};
-use pyo3::IntoPyObjectExt;
+use pyo3::{IntoPyObjectExt};
 use rayon::prelude::*;
 use rustc_hash::FxHashSet;
 use std::collections::HashSet;
 
+
 #[pymodule]
 fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
+    m.add_wrapped(wrap_pyfunction!(parse_to_imported_objects))?;
     m.add_class::<GraphWrapper>()?;
     m.add("ModuleNotPresent", py.get_type::<ModuleNotPresent>())?;
     m.add("NoSuchContainer", py.get_type::<NoSuchContainer>())?;
@@ -30,6 +33,32 @@ fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
     Ok(())
 }
 
+
+
+
+#[pyfunction]
+fn parse_to_imported_objects<'py>(py: Python<'py>, python_code: &str) -> PyResult<Vec<Bound<'py, PyDict>>> {
+    let imports = match parsing::parse_imports(python_code) {
+        Ok(imports) => imports,
+        Err(message) => return Err(PyValueError::new_err(message)),
+    };
+
+    let imports_as_dicts = imports.into_iter().map(
+        |import| {
+            let dict = PyDict::new(py);
+            dict.set_item("name", import.imported_object).unwrap();
+            dict.set_item("line_number", import.line_number).unwrap();
+            dict.set_item("line_contents", import.line_contents).unwrap();
+            dict.set_item("typechecking_only", import.typechecking_only).unwrap();
+            dict
+        }
+    ).collect();
+
+    Ok(imports_as_dicts)
+}
+
+
+
 #[pyclass(name = "Graph")]
 struct GraphWrapper {
     _graph: Graph,

From c80bfd11b97039d7941b97431847a7d62f635551 Mon Sep 17 00:00:00 2001
From: David Seddon <david.seddon@octoenergy.com>
Date: Fri, 2 May 2025 10:29:13 +0100
Subject: [PATCH 4/8] Call rust parser from Python

---
 src/grimp/adaptors/importscanner.py        | 26 +++++++++++++---------
 tests/assets/syntaxerrorpackage/foo/one.py |  4 +++-
 tests/assets/syntaxerrorpackage/foo/two.py |  0
 tests/functional/test_error_handling.py    | 23 +++++++++----------
 4 files changed, 29 insertions(+), 24 deletions(-)
 create mode 100644 tests/assets/syntaxerrorpackage/foo/two.py

diff --git a/src/grimp/adaptors/importscanner.py b/src/grimp/adaptors/importscanner.py
index 863ebb64..36317efe 100644
--- a/src/grimp/adaptors/importscanner.py
+++ b/src/grimp/adaptors/importscanner.py
@@ -11,6 +11,7 @@
 from grimp.application.ports.importscanner import AbstractImportScanner
 from grimp.application.ports.modulefinder import FoundPackage
 from grimp.domain.valueobjects import DirectImport, Module
+from grimp import _rustgrimp as rust  # type: ignore[attr-defined]
 
 logger = logging.getLogger(__name__)
 
@@ -138,16 +139,21 @@ def _module_is_package(self, module_filename: str) -> bool:
 
     @staticmethod
     def _get_raw_imported_objects(module_contents: str) -> Set[_ImportedObject]:
-        module_lines = module_contents.splitlines()
-
-        ast_tree = ast.parse(module_contents)
-
-        visitor = _TreeVisitor(
-            module_lines=module_lines,
-        )
-        visitor.visit(ast_tree)
-
-        return visitor.imported_objects
+        dicts_from_rust = rust.parse_to_imported_objects(module_contents)
+        objects_from_rust = {_ImportedObject(**object_kwargs) for object_kwargs in dicts_from_rust}
+
+        # TODO - remove these lines once we're confident the rust way
+        # is consistent with ast.
+        #
+        # module_lines = module_contents.splitlines()
+        # ast_tree = ast.parse(module_contents)
+        # visitor = _TreeVisitor(module_lines=module_lines)
+        # visitor.visit(ast_tree)
+        # objects_from_ast = visitor.imported_objects
+        #
+        # assert objects_from_rust == objects_from_ast, "Discrepancy!"
+
+        return objects_from_rust
 
     @staticmethod
     def _get_absolute_imported_object_name(
diff --git a/tests/assets/syntaxerrorpackage/foo/one.py b/tests/assets/syntaxerrorpackage/foo/one.py
index cbf77873..5587e1b8 100644
--- a/tests/assets/syntaxerrorpackage/foo/one.py
+++ b/tests/assets/syntaxerrorpackage/foo/one.py
@@ -2,4 +2,6 @@
 x = 1
 
 
-fromb . import two
+fromb . import three
+
+from . import two
\ No newline at end of file
diff --git a/tests/assets/syntaxerrorpackage/foo/two.py b/tests/assets/syntaxerrorpackage/foo/two.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/functional/test_error_handling.py b/tests/functional/test_error_handling.py
index 2e0cd137..ab816b2c 100644
--- a/tests/functional/test_error_handling.py
+++ b/tests/functional/test_error_handling.py
@@ -1,4 +1,3 @@
-import os
 import re
 
 import pytest  # type: ignore
@@ -6,19 +5,17 @@
 from grimp import build_graph, exceptions
 
 
-def test_syntax_error_includes_module():
-    dirname = os.path.dirname(__file__)
-    filename = os.path.abspath(
-        os.path.join(dirname, "..", "assets", "syntaxerrorpackage", "foo", "one.py")
-    )
+def test_does_not_raise_exception_if_encounters_syntax_error():
+    """
+    We don't make any promises about what to do if there's a syntax error,
+    but the parser isn't a complete implementation of the Python parser, so
+    it may just ignore it - finding other imports, as in this case.
+    """
+    graph = build_graph("syntaxerrorpackage", cache_dir=None)
 
-    with pytest.raises(exceptions.SourceSyntaxError) as excinfo:
-        build_graph("syntaxerrorpackage", cache_dir=None)
-
-    expected_exception = exceptions.SourceSyntaxError(
-        filename=filename, lineno=5, text="fromb . import two\n"
-    )
-    assert expected_exception == excinfo.value
+    assert graph.find_modules_directly_imported_by("syntaxerrorpackage.foo.one") == {
+        "syntaxerrorpackage.foo.two"
+    }
 
 
 def test_missing_root_init_file():

From 08157bc6ddc53d427d0cb6a19cd12b1aa72227a7 Mon Sep 17 00:00:00 2001
From: David Seddon <david.seddon@octoenergy.com>
Date: Fri, 2 May 2025 15:55:07 +0100
Subject: [PATCH 5/8] Update CHANGELOG

---
 CHANGELOG.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 7ae982e5..cea66adb 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,6 +2,13 @@
 Changelog
 =========
 
+latest
+------
+
+* Use Rust instead of Python's built-in ast module for import parsing.
+  Note that `SourceSyntaxError` will no longer be raised if the parser encounters
+  invalid Python.
+
 3.8.2 (2025-04-24)
 ------------------
 

From 9d1f214685b60b6711bf335d02f827a4d275e6c6 Mon Sep 17 00:00:00 2001
From: David Seddon <david.seddon@octoenergy.com>
Date: Fri, 2 May 2025 13:47:10 +0100
Subject: [PATCH 6/8] Correct inconsistencies with Rust-based parsing

Prior to this, the parsing behaved differently from the Python-based
parsing in three ways:

1. The line contents only included up to the last matched token, meaning
comments afterwards were excluded.
2. The type_checking conditional did not work for an alias of the
`typing` module, e.g. `if t.TYPE_CHECKING`.
3. Non-ascii Python identifiers weren't supported.

This commit fixes both of those.
---
 rust/Cargo.lock                           | 10 ++++
 rust/Cargo.toml                           |  1 +
 rust/src/parsing.rs                       | 73 ++++++++++++++++++++---
 tests/unit/adaptors/test_importscanner.py | 21 +++++++
 4 files changed, 96 insertions(+), 9 deletions(-)

diff --git a/rust/Cargo.lock b/rust/Cargo.lock
index f60f25f0..27774e70 100644
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -15,6 +15,7 @@ dependencies = [
  "lazy_static",
  "nom 8.0.0",
  "nom-regex",
+ "nom-unicode",
  "nom_locate",
  "parameterized",
  "pyimportparse",
@@ -272,6 +273,15 @@ dependencies = [
  "regex",
 ]
 
+[[package]]
+name = "nom-unicode"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "00dea06b6b61609274f105c468f1ea996d2f7226bfa528b4de79c05eabf5f759"
+dependencies = [
+ "nom 8.0.0",
+]
+
 [[package]]
 name = "nom_locate"
 version = "5.0.0"
diff --git a/rust/Cargo.toml b/rust/Cargo.toml
index 94e81cc1..aab46c79 100644
--- a/rust/Cargo.toml
+++ b/rust/Cargo.toml
@@ -26,6 +26,7 @@ pyimportparse = "0.2.1"
 nom = "8.0.0"
 nom_locate = "5.0.0"
 nom-regex = "0.2.0"
+nom-unicode = "0.4.0"
 
 [dependencies.pyo3]
 version = "0.24.1"
diff --git a/rust/src/parsing.rs b/rust/src/parsing.rs
index ee10f23c..b9500126 100644
--- a/rust/src/parsing.rs
+++ b/rust/src/parsing.rs
@@ -27,8 +27,9 @@
 use nom::branch::alt;
 use nom::bytes::complete::{tag, take_until};
 use nom::character::complete::{
-    alphanumeric1, line_ending, multispace1, not_line_ending, space0, space1,
+    line_ending, multispace1, not_line_ending, space0, space1,
 };
+use nom_unicode::complete::alphanumeric1;
 use nom::combinator::{all_consuming, opt, recognize, value, verify};
 use nom::multi::{many0, many1, separated_list1};
 use nom::sequence::{delimited, preceded, terminated};
@@ -61,12 +62,28 @@ impl Import {
     }
 }
 
-pub fn parse_imports(s: &str) -> Result<Vec<Import>, String> {
-    let s = Span::new(s);
-    let (_, result) = all_consuming(parse_block(false))
-        .parse(s)
+pub fn parse_imports(python_file_contents: &str) -> Result<Vec<Import>, String> {
+    let span = Span::new(python_file_contents);
+    let (_, imports) = all_consuming(parse_block(false))
+        .parse(span)
         .map_err(|e| e.to_string())?;
-    Ok(result)
+    Ok(with_corrected_line_contents(python_file_contents, imports))
+}
+
+// Return the imports, but with the full line contents.
+// Currently our nom parsing only pulls out to the latest token, so we correct it here
+// by finding the corresponding whole line, based on the line number.
+// TODO: adjust the parsing code so it figures it out correctly in the first place.
+fn with_corrected_line_contents(python_file_contents: &str, imports: Vec<Import>) -> Vec<Import> {
+    let lines: Vec<&str> = python_file_contents.lines().collect();
+    imports.into_iter().map(
+        |import| Import {
+            imported_object: import.imported_object,
+            line_number: import.line_number,
+            line_contents: lines[(import.line_number as usize) - 1].trim_start().to_string(),
+            typechecking_only: import.typechecking_only,
+        }
+    ).collect()
 }
 
 fn parse_block(typechecking_only: bool) -> impl Fn(Span) -> IResult<Span, Vec<Import>> {
@@ -276,8 +293,18 @@ fn parse_relative_module(s: Span) -> IResult<Span, &str> {
     Ok((s, result.fragment()))
 }
 
+
+// Parse a valid Python identifier.
+//
+// Note this is not implemented as thoroughly as in the Python spec.
+// Some identifiers will be valid here (e.g. ones that begin with digits)
+// that aren't actually valid in Python. Unicode identifiers are supported.
+//
+// See https://docs.python.org/3/reference/lexical_analysis.html#identifiers
 fn parse_identifier(s: Span) -> IResult<Span, &str> {
-    let (s, result) = recognize(many1(alt((alphanumeric1, tag("_"))))).parse(s)?;
+    let (s, result) = recognize(
+        many1(alt((alphanumeric1, tag("_"))))
+    ).parse(s)?;
     Ok((s, result.fragment()))
 }
 
@@ -310,11 +337,17 @@ fn parse_space1(s: Span) -> IResult<Span, ()> {
     Ok((s, ()))
 }
 
+
 fn parse_if_typechecking(s: Span) -> IResult<Span, Vec<Import>> {
     let (s, _) = (
         tag("if"),
         parse_space1,
-        alt((tag("TYPE_CHECKING"), tag("typing.TYPE_CHECKING"))),
+        alt(
+            (
+                tag("TYPE_CHECKING"),
+                preceded(parse_identifier, tag(".TYPE_CHECKING")),
+            ),
+        ),
         parse_space0,
         tag(":"),
     )
@@ -575,6 +608,18 @@ import baz
 
         (r#"
 import foo
+if t.TYPE_CHECKING: import bar
+import baz
+"#, &[("foo", false), ("bar", true), ("baz", false)]),
+
+        (r#"
+import foo
+if some_WE1RD_alias.TYPE_CHECKING: import bar
+import baz
+"#, &[("foo", false), ("bar", true), ("baz", false)]),
+
+        (r#"
+import foo
 if  TYPE_CHECKING :  import bar
 import baz
 "#, &[("foo", false), ("bar", true), ("baz", false)]),
@@ -734,17 +779,27 @@ if TYPE_CHECKING:
         let imports = parse_imports(
             "
 import a
+import a.b  # Comment afterwards.
 from b import c
 from d import (e)
-from f import *",
+from f import *
+from something.foo import *  # Comment afterwards.
+if True:
+    from indented import foo
+from ñon_ascii_变 import ラーメン
+",
         )
             .unwrap();
         assert_eq!(
             vec![
                 ("a".to_owned(), "import a".to_owned()),
+                ("a.b".to_owned(), "import a.b  # Comment afterwards.".to_owned()),
                 ("b.c".to_owned(), "from b import c".to_owned()),
                 ("d.e".to_owned(), "from d import (e)".to_owned()),
                 ("f.*".to_owned(), "from f import *".to_owned()),
+                ("something.foo.*".to_owned(), "from something.foo import *  # Comment afterwards.".to_owned()),
+                ("indented.foo".to_owned(), "from indented import foo".to_owned()),
+                ("ñon_ascii_变.ラーメン".to_owned(), "from ñon_ascii_变 import ラーメン".to_owned()),
             ],
             imports
                 .into_iter()
diff --git a/tests/unit/adaptors/test_importscanner.py b/tests/unit/adaptors/test_importscanner.py
index 601b9108..5fe19d7f 100644
--- a/tests/unit/adaptors/test_importscanner.py
+++ b/tests/unit/adaptors/test_importscanner.py
@@ -854,6 +854,9 @@ def test_get_raw_imports():
 from . import g
 from .. import h
 from i import *
+from ñon_ascii_变 import *
+from . import ñon_ascii_变
+import ñon_ascii_变.ラーメン
 """
 
     raw_imported_objects = ImportScanner._get_raw_imported_objects(module_contents)
@@ -901,6 +904,24 @@ def test_get_raw_imports():
             line_contents="from i import *",
             typechecking_only=False,
         ),
+        _ImportedObject(
+            name="ñon_ascii_变.*",
+            line_number=9,
+            line_contents="from ñon_ascii_变 import *",
+            typechecking_only=False,
+        ),
+        _ImportedObject(
+            name=".ñon_ascii_变",
+            line_number=10,
+            line_contents="from . import ñon_ascii_变",
+            typechecking_only=False,
+        ),
+        _ImportedObject(
+            name="ñon_ascii_变.ラーメン",
+            line_number=11,
+            line_contents="import ñon_ascii_变.ラーメン",
+            typechecking_only=False,
+        ),
     }
 
 

From c1341c4d4b389e76a32c80cda575400c5455466a Mon Sep 17 00:00:00 2001
From: David Seddon <david.seddon@octoenergy.com>
Date: Fri, 2 May 2025 15:48:39 +0100
Subject: [PATCH 7/8] Turn test_get_raw_imports into a class

---
 tests/unit/adaptors/test_importscanner.py | 161 +++++++++++-----------
 1 file changed, 82 insertions(+), 79 deletions(-)

diff --git a/tests/unit/adaptors/test_importscanner.py b/tests/unit/adaptors/test_importscanner.py
index 5fe19d7f..fca4ab08 100644
--- a/tests/unit/adaptors/test_importscanner.py
+++ b/tests/unit/adaptors/test_importscanner.py
@@ -2,6 +2,7 @@
 
 import pytest  # type: ignore
 
+from textwrap import dedent
 from grimp.adaptors.importscanner import ImportScanner, _ImportedObject
 from grimp.application.ports.modulefinder import FoundPackage, ModuleFile
 from grimp.domain.valueobjects import DirectImport, Module
@@ -844,85 +845,87 @@ def _modules_to_module_files(modules: Set[Module]) -> Set[ModuleFile]:
     return {ModuleFile(module=module, mtime=some_mtime) for module in modules}
 
 
-def test_get_raw_imports():
-    module_contents = """\
-import a
-if TYPE_CHECKING:
-    import b
-from c import d
-from .e import f
-from . import g
-from .. import h
-from i import *
-from ñon_ascii_变 import *
-from . import ñon_ascii_变
-import ñon_ascii_变.ラーメン
-"""
-
-    raw_imported_objects = ImportScanner._get_raw_imported_objects(module_contents)
-
-    assert raw_imported_objects == {
-        _ImportedObject(
-            name="a",
-            line_number=1,
-            line_contents="import a",
-            typechecking_only=False,
-        ),
-        _ImportedObject(
-            name="b",
-            line_number=3,
-            line_contents="import b",
-            typechecking_only=True,
-        ),
-        _ImportedObject(
-            name="c.d",
-            line_number=4,
-            line_contents="from c import d",
-            typechecking_only=False,
-        ),
-        _ImportedObject(
-            name=".e.f",
-            line_number=5,
-            line_contents="from .e import f",
-            typechecking_only=False,
-        ),
-        _ImportedObject(
-            name=".g",
-            line_number=6,
-            line_contents="from . import g",
-            typechecking_only=False,
-        ),
-        _ImportedObject(
-            name="..h",
-            line_number=7,
-            line_contents="from .. import h",
-            typechecking_only=False,
-        ),
-        _ImportedObject(
-            name="i.*",
-            line_number=8,
-            line_contents="from i import *",
-            typechecking_only=False,
-        ),
-        _ImportedObject(
-            name="ñon_ascii_变.*",
-            line_number=9,
-            line_contents="from ñon_ascii_变 import *",
-            typechecking_only=False,
-        ),
-        _ImportedObject(
-            name=".ñon_ascii_变",
-            line_number=10,
-            line_contents="from . import ñon_ascii_变",
-            typechecking_only=False,
-        ),
-        _ImportedObject(
-            name="ñon_ascii_变.ラーメン",
-            line_number=11,
-            line_contents="import ñon_ascii_变.ラーメン",
-            typechecking_only=False,
-        ),
-    }
+class TestGetRawImportedObjects:
+    def test_get_raw_imports(self):
+        module_contents = dedent(
+            """\
+            import a
+            if TYPE_CHECKING:
+                import b
+            from c import d
+            from .e import f
+            from . import g
+            from .. import h
+            from i import *
+            from ñon_ascii_变 import *
+            from . import ñon_ascii_变
+            import ñon_ascii_变.ラーメン
+            """
+        )
+        raw_imported_objects = ImportScanner._get_raw_imported_objects(module_contents)
+
+        assert raw_imported_objects == {
+            _ImportedObject(
+                name="a",
+                line_number=1,
+                line_contents="import a",
+                typechecking_only=False,
+            ),
+            _ImportedObject(
+                name="b",
+                line_number=3,
+                line_contents="import b",
+                typechecking_only=True,
+            ),
+            _ImportedObject(
+                name="c.d",
+                line_number=4,
+                line_contents="from c import d",
+                typechecking_only=False,
+            ),
+            _ImportedObject(
+                name=".e.f",
+                line_number=5,
+                line_contents="from .e import f",
+                typechecking_only=False,
+            ),
+            _ImportedObject(
+                name=".g",
+                line_number=6,
+                line_contents="from . import g",
+                typechecking_only=False,
+            ),
+            _ImportedObject(
+                name="..h",
+                line_number=7,
+                line_contents="from .. import h",
+                typechecking_only=False,
+            ),
+            _ImportedObject(
+                name="i.*",
+                line_number=8,
+                line_contents="from i import *",
+                typechecking_only=False,
+            ),
+            _ImportedObject(
+                name="ñon_ascii_变.*",
+                line_number=9,
+                line_contents="from ñon_ascii_变 import *",
+                typechecking_only=False,
+            ),
+            _ImportedObject(
+                name=".ñon_ascii_变",
+                line_number=10,
+                line_contents="from . import ñon_ascii_变",
+                typechecking_only=False,
+            ),
+            _ImportedObject(
+                name="ñon_ascii_变.ラーメン",
+                line_number=11,
+                line_contents="import ñon_ascii_变.ラーメン",
+                typechecking_only=False,
+            ),
+        }
 
 
 @pytest.mark.parametrize(

From a39e4045df717443132137c0ab630f940c9e0570 Mon Sep 17 00:00:00 2001
From: David Seddon <david.seddon@octoenergy.com>
Date: Fri, 2 May 2025 16:50:29 +0100
Subject: [PATCH 8/8] Add failing tests for multiline comments

---
 tests/unit/adaptors/test_importscanner.py | 61 +++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/tests/unit/adaptors/test_importscanner.py b/tests/unit/adaptors/test_importscanner.py
index fca4ab08..a0db2017 100644
--- a/tests/unit/adaptors/test_importscanner.py
+++ b/tests/unit/adaptors/test_importscanner.py
@@ -927,6 +927,67 @@ def test_get_raw_imports(self):
             ),
         }
 
+    @pytest.mark.xfail(strict=True)
+    def test_multiline_comment_a(self):
+        module_contents = dedent(
+            """\
+            import a
+
+            FOO = \"\"\"
+            ...
+            \"\"\"
+
+            \"\"\"import b
+            import c
+            \"\"\"
+
+            import d
+            """
+        )
+        raw_imported_objects = ImportScanner._get_raw_imported_objects(module_contents)
+
+        assert raw_imported_objects == {
+            _ImportedObject(
+                name="a",
+                line_number=1,
+                line_contents="import a",
+                typechecking_only=False,
+            ),
+            _ImportedObject(
+                name="d",
+                line_number=11,
+                line_contents="import d",
+                typechecking_only=False,
+            ),
+        }
+
+    @pytest.mark.xfail(strict=True)
+    def test_multiline_comment_b(self):
+        module_contents = dedent(
+            """\
+            r\"\"\"
+            A raw multiline string.
+            \"\"\"
+
+            import a
+
+            \"\"\"
+            Another multiline string.
+            \"\"\"
+            """
+        )
+
+        raw_imported_objects = ImportScanner._get_raw_imported_objects(module_contents)
+
+        assert raw_imported_objects == {
+            _ImportedObject(
+                name="a",
+                line_number=5,
+                line_contents="import a",
+                typechecking_only=False,
+            ),
+        }
+
 
 @pytest.mark.parametrize(
     "is_package,imported_object_name,expected_absolute_imported_object_name",