From b90f62e7d1f46b57037c0bbb7ca71f3d6c2e286f Mon Sep 17 00:00:00 2001 From: danceratopz Date: Thu, 22 Jan 2026 17:46:30 +0100 Subject: [PATCH 01/14] Fix glob patterns in template packaging - Use explicit patterns instead of recursive ** globs. - Setuptools doesn't reliably include files with ** in sdist builds. --- setup.cfg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.cfg b/setup.cfg index 409974a..65a4fc9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,17 +32,17 @@ docc = py.typed docc.plugins.html = - templates/** + templates/*.html static/docc.css static/search.js static/chota/dist/chota.min.css static/fuse/dist/fuse.min.js docc.plugins.listing = - templates/** + templates/*.html docc.plugins.python = - templates/** + templates/html/*.html [options.entry_points] console_scripts = From 0d59b5cdc38032866f04a104ab0b968a614ab42c Mon Sep 17 00:00:00 2001 From: danceratopz Date: Thu, 22 Jan 2026 17:46:59 +0100 Subject: [PATCH 02/14] Add test extras with pytest-cov dependency - Add dedicated test extras with pytest and pytest-cov. --- setup.cfg | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.cfg b/setup.cfg index 65a4fc9..17d2bfd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -110,6 +110,9 @@ lint = flake8-bugbear>=25.10.21,<26.0.0 flake8>=7.3,<8 pytest>=8.4.2,<9 +test = + pytest>=8.4.2,<9 + pytest-cov>=6.0,<7 [flake8] dictionaries=en_US,python,technical From f748bc6c8332546fdbe5a35467834f064b36bf44 Mon Sep 17 00:00:00 2001 From: danceratopz Date: Tue, 24 Feb 2026 11:18:33 +0100 Subject: [PATCH 03/14] Include test extras in tox environments - Add test extras to both test and type environments so pytest and pytest-cov are available alongside lint deps. --- tox.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tox.ini b/tox.ini index 612d4d0..3fd5318 100644 --- a/tox.ini +++ b/tox.ini @@ -12,6 +12,7 @@ python = description = run tests extras = lint + test commands = python --version isort src tests setup.py --check --diff @@ -25,6 +26,7 @@ description = check type annotations platform = (linux|darwin) extras = lint + test commands = python --version pyre --noninteractive check From 2373f682df78c18774ae141eb604d9576a3cba91 Mon Sep 17 00:00:00 2001 From: danceratopz Date: Thu, 22 Jan 2026 17:47:41 +0100 Subject: [PATCH 04/14] Add pytest and coverage configuration - Configure pytest to run with coverage enabled. - Set minimum coverage threshold to 80%. - Exclude common non-testable patterns from coverage. --- pyproject.toml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index c491fcd..d211eb8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,3 +15,20 @@ paths = [ "src" ] [tool.docc.output] path = "docs" + +[tool.pytest.ini_options] +# term-missing shows uncovered line numbers in terminal output +addopts = "--cov=docc --cov-report=term-missing" +testpaths = ["tests"] + +[tool.coverage.run] +source = ["src/docc"] +branch = true + +[tool.coverage.report] +fail_under = 80 +exclude_lines = [ + "pragma: no cover", + "if TYPE_CHECKING:", + "raise NotImplementedError", +] From 6f0caf1cc6633e3f14b388c210c975ffadb6549f Mon Sep 17 00:00:00 2001 From: danceratopz Date: Tue, 24 Feb 2026 10:22:49 +0100 Subject: [PATCH 05/14] Add development section to README --- README.md | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/README.md b/README.md index 6a006c0..dfee7b2 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,73 @@ Finally, to generate the documentation: docc ``` +## Development + +### Setting Up + +Clone the repository with submodules: + +```bash +git clone --recurse-submodules https://github.com/SamWilsn/docc.git +cd docc +``` + +If you already cloned without submodules: + +```bash +git submodule update --init --recursive +``` + +Install in development mode to run tests and lint: + +```bash +pip install -e ".[test,lint]" +``` + +### Code Style + +This project uses: + +- **black** for code formatting (line length: 79). +- **isort** for import sorting (black profile). +- **flake8** for linting. +- **pyre** for type checking. + +Format code before committing: + +```bash +black src tests +isort src tests +``` + +### Running Tests + +```bash +pytest +``` + +Tests require 80% code coverage to pass. For a detailed coverage report: + +```bash +pytest --cov-report=html +``` + +The HTML report will be generated in `htmlcov/`. + +### Using Tox + +Run the full test suite with linting: + +```bash +tox +``` + +Run only type checking: + +```bash +tox -e type +``` + [docs-badge]: https://github.com/SamWilsn/docc/actions/workflows/gh-pages.yaml/badge.svg?branch=master [docs]: https://samwilsn.github.io/docc/ [`pyproject.toml`]: https://packaging.python.org/en/latest/specifications/declaring-project-metadata/#declaring-project-metadata From 17b906182e269dd4af9c42146dcb91bf9050870a Mon Sep 17 00:00:00 2001 From: danceratopz Date: Thu, 22 Jan 2026 17:49:11 +0100 Subject: [PATCH 06/14] Add comprehensive test suite - Add tests for CLI, settings, context, and document modules. - Add tests for plugin loader and transform pipeline. - Add tests for HTML, mistletoe, and verbatim plugins. - Add end-to-end HTML rendering pipeline tests. - Add tests for Python CST parsing and node types. - Add tests for references, search, and resources plugins. - Add integration tests for end-to-end workflows. - Add behavior-level pipeline contract tests. - Achieve 90% code coverage. --- tests/conftest.py | 22 +- tests/test_build_discover.py | 173 +++++ tests/test_cli.py | 399 ++++++++++ tests/test_context.py | 152 ++++ tests/test_debug.py | 115 +++ tests/test_document.py | 518 +++++++++++++ tests/test_files.py | 226 ++++++ tests/test_html.py | 679 +++++++++++++++++ tests/test_html_comprehensive.py | 360 +++++++++ tests/test_html_e2e.py | 611 +++++++++++++++ tests/test_html_extended.py | 249 ++++++ tests/test_integration.py | 1001 +++++++++++++++++++++++++ tests/test_listing.py | 342 +++++++++ tests/test_loader.py | 123 +++ tests/test_mistletoe_comprehensive.py | 831 ++++++++++++++++++++ tests/test_mistletoe_extended.py | 269 +++++++ tests/test_python_cst.py | 505 +++++++++++++ tests/test_references.py | 386 ++++++++++ tests/test_resources.py | 155 ++++ tests/test_search.py | 455 +++++++++++ tests/test_settings.py | 307 ++++++++ tests/test_source.py | 184 +++++ tests/test_transform.py | 101 +++ tests/test_verbatim.py | 570 ++++++++++++++ whitelist.txt | 47 +- 25 files changed, 8775 insertions(+), 5 deletions(-) create mode 100644 tests/test_build_discover.py create mode 100644 tests/test_cli.py create mode 100644 tests/test_context.py create mode 100644 tests/test_debug.py create mode 100644 tests/test_document.py create mode 100644 tests/test_files.py create mode 100644 tests/test_html.py create mode 100644 tests/test_html_comprehensive.py create mode 100644 tests/test_html_e2e.py create mode 100644 tests/test_html_extended.py create mode 100644 tests/test_integration.py create mode 100644 tests/test_listing.py create mode 100644 tests/test_loader.py create mode 100644 tests/test_mistletoe_comprehensive.py create mode 100644 tests/test_mistletoe_extended.py create mode 100644 tests/test_python_cst.py create mode 100644 tests/test_references.py create mode 100644 tests/test_resources.py create mode 100644 tests/test_search.py create mode 100644 tests/test_settings.py create mode 100644 tests/test_source.py create mode 100644 tests/test_transform.py create mode 100644 tests/test_verbatim.py diff --git a/tests/conftest.py b/tests/conftest.py index 6dc80ff..0e25dde 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,6 +21,7 @@ from docc.context import Context from docc.document import Document, Node, Visit, Visitor +from docc.plugins.references import Reference from docc.settings import PluginSettings, Settings @@ -55,7 +56,7 @@ def _assert_in( haystack = haystack.root visitor = ContainsVisitor(matcher) haystack.visit(visitor) - assert visitor.found + assert visitor.found, f"No matching node found in tree: {haystack!r}" def _assert_not_in( @@ -105,3 +106,22 @@ def _make_context(root: Node) -> Context: @pytest.fixture def make_context() -> Callable[[Node], Context]: return _make_context + + +class ReferenceChecker(Visitor): + """Helper visitor to check for Reference nodes in a tree.""" + + def __init__(self) -> None: + self.found = False + self.count = 0 + + @override + def enter(self, node: Node) -> Visit: + if isinstance(node, Reference): + self.found = True + self.count += 1 + return Visit.TraverseChildren + + @override + def exit(self, node: Node) -> None: + pass diff --git a/tests/test_build_discover.py b/tests/test_build_discover.py new file mode 100644 index 0000000..8da79b0 --- /dev/null +++ b/tests/test_build_discover.py @@ -0,0 +1,173 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import tempfile +from pathlib import Path, PurePath +from typing import Dict, FrozenSet, Iterator, Optional, Set + +import pytest + +from docc.build import Builder +from docc.build import load as load_builders +from docc.discover import Discover +from docc.discover import load as load_discovers +from docc.document import BlankNode, Document +from docc.settings import PluginSettings, Settings +from docc.source import Source + + +@pytest.fixture +def temp_dir() -> Iterator[Path]: + with tempfile.TemporaryDirectory() as td: + yield Path(td) + + +class MockSource(Source): + _path: PurePath + + def __init__(self, path: Optional[PurePath] = None) -> None: + self._path = path if path is not None else PurePath("mock.py") + + @property + def relative_path(self) -> Optional[PurePath]: + return self._path + + @property + def output_path(self) -> PurePath: + return self._path + + +class ConcreteDiscover(Discover): + def __init__(self, config: PluginSettings) -> None: + self.config = config + + def discover(self, known: FrozenSet[Source]) -> Iterator[Source]: + yield MockSource() + + +class ConcreteBuilder(Builder): + def __init__(self, config: PluginSettings) -> None: + self.config = config + + def build( + self, + unprocessed: Set[Source], + processed: Dict[Source, Document], + ) -> None: + for source in list(unprocessed): + if isinstance(source, MockSource): + unprocessed.remove(source) + processed[source] = Document(BlankNode()) + + +class TestDiscover: + def test_concrete_discover(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + plugin_settings = settings.for_plugin("test") + + discover = ConcreteDiscover(plugin_settings) + sources = list(discover.discover(frozenset())) + + assert len(sources) == 1 + assert isinstance(sources[0], MockSource) + + +class TestBuilder: + def test_concrete_builder(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + plugin_settings = settings.for_plugin("test") + + builder = ConcreteBuilder(plugin_settings) + unprocessed: Set[Source] = {MockSource()} + processed: Dict[Source, Document] = {} + + builder.build(unprocessed, processed) + + assert len(unprocessed) == 0 + assert len(processed) == 1 + + +class TestBuilderContextManager: + def test_builder_with_statement(self, temp_dir: Path) -> None: + """ + Builder extends AbstractContextManager, so it must support + the with statement (enter/exit protocol). + """ + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + plugin_settings = settings.for_plugin("test") + + builder = ConcreteBuilder(plugin_settings) + with builder as b: + assert b is builder + + +class TestLoadDiscovers: + def test_load_empty_discovery_list(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + {"tool": {"docc": {"discovery": []}}}, + ) + + result = list(load_discovers(settings)) + assert result == [] + + def test_load_single_discover(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "discovery": ["docc.python.discover"], + "plugins": { + "docc.python.discover": {"paths": [str(temp_dir)]} + }, + } + } + }, + ) + + result = list(load_discovers(settings)) + assert len(result) == 1 + assert result[0][0] == "docc.python.discover" + assert isinstance(result[0][1], Discover) + + +class TestLoadBuilders: + def test_load_empty_builder_list(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + {"tool": {"docc": {"build": []}}}, + ) + + result = list(load_builders(settings)) + assert result == [] + + def test_load_single_builder(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "build": ["docc.python.build"], + "plugins": {"docc.python.build": {}}, + } + } + }, + ) + + result = list(load_builders(settings)) + assert len(result) == 1 + assert result[0][0] == "docc.python.build" + assert isinstance(result[0][1], Builder) diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 0000000..4a2b80c --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,399 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import logging +import os +import tempfile +from io import StringIO, TextIOBase +from pathlib import Path +from typing import Iterator, Tuple + +import pytest + +from docc.cli import _OutputVisitor, main +from docc.context import Context +from docc.document import ( + BlankNode, + ListNode, + Node, + OutputNode, + Visit, +) + + +@pytest.fixture +def temp_dir() -> Iterator[Path]: + with tempfile.TemporaryDirectory() as td: + yield Path(td) + + +class MockOutputNode(OutputNode): + def __init__( + self, content: str = "output content", ext: str = ".html" + ) -> None: + self._content = content + self._ext = ext + + @property + def children(self) -> Tuple[()]: + return () + + def replace_child(self, old: Node, new: Node) -> None: + pass + + @property + def extension(self) -> str: + return self._ext + + def output(self, context: Context, destination: TextIOBase) -> None: + destination.write(self._content) + + +class TestOutputVisitor: + def test_enter_output_node_calls_output(self) -> None: + output_node = MockOutputNode("test output") + context = Context({}) + destination = StringIO() + + visitor = _OutputVisitor(context, destination) + result = visitor.enter(output_node) + + assert result == Visit.SkipChildren + assert destination.getvalue() == "test output" + + def test_enter_non_output_node_traverses(self) -> None: + node = BlankNode() + context = Context({}) + destination = StringIO() + + visitor = _OutputVisitor(context, destination) + result = visitor.enter(node) + + assert result == Visit.TraverseChildren + + def test_enter_list_node_traverses(self) -> None: + node = ListNode([BlankNode()]) + context = Context({}) + destination = StringIO() + + visitor = _OutputVisitor(context, destination) + result = visitor.enter(node) + + assert result == Visit.TraverseChildren + + def test_exit_does_nothing(self) -> None: + node = BlankNode() + context = Context({}) + destination = StringIO() + + visitor = _OutputVisitor(context, destination) + result = visitor.exit(node) + + # exit() should complete without modifying destination + assert result is None, "exit() should return None" + assert ( + destination.getvalue() == "" + ), "exit() should not write to destination" + + +class TestMainFunction: + def test_main_requires_output_path(self, temp_dir: Path) -> None: + pyproject = temp_dir / "pyproject.toml" + pyproject.write_text("[tool.docc]\n") + + original_cwd = os.getcwd() + try: + os.chdir(temp_dir) + with pytest.raises(SystemExit) as exc_info: + main([]) + assert exc_info.value.code == 1 + finally: + os.chdir(original_cwd) + + def test_main_with_output_flag(self, temp_dir: Path) -> None: + # Create a minimal Python file to discover and process + src_dir = temp_dir / "src" + src_dir.mkdir() + py_file = src_dir / "example.py" + py_file.write_text('"""Module docstring."""\n') + + pyproject = temp_dir / "pyproject.toml" + pyproject.write_text( + f""" +[tool.docc] +discovery = ["docc.python.discover"] +build = ["docc.python.build"] +transform = ["docc.python.transform", "docc.html.transform"] +context = ["docc.html.context"] + +[tool.docc.plugins."docc.python.discover"] +paths = ["{src_dir}"] + +[tool.docc.output] +path = "docs" +""" + ) + + output_dir = temp_dir / "output" + + original_cwd = os.getcwd() + try: + os.chdir(temp_dir) + main(["--output", str(output_dir)]) + finally: + os.chdir(original_cwd) + + assert output_dir.exists(), "Output directory should be created" + + def test_main_uses_settings_output_path(self, temp_dir: Path) -> None: + # Create a minimal Python file to discover and process + src_dir = temp_dir / "src" + src_dir.mkdir() + py_file = src_dir / "example.py" + py_file.write_text('"""Module docstring."""\n') + + output_dir = temp_dir / "docs" + pyproject = temp_dir / "pyproject.toml" + pyproject.write_text( + f""" +[tool.docc] +discovery = ["docc.python.discover"] +build = ["docc.python.build"] +transform = ["docc.python.transform", "docc.html.transform"] +context = ["docc.html.context"] + +[tool.docc.plugins."docc.python.discover"] +paths = ["{src_dir}"] + +[tool.docc.output] +path = "{output_dir}" +""" + ) + + original_cwd = os.getcwd() + try: + os.chdir(temp_dir) + main([]) + finally: + os.chdir(original_cwd) + + assert ( + output_dir.exists() + ), "Output directory should be created from settings" + + +class TestOutputVisitorWithNestedNodes: + def test_nested_output_nodes(self) -> None: + inner = MockOutputNode("inner") + outer_content = ListNode([inner]) + + class ContainerNode(OutputNode): + @property + def children(self): + return (outer_content,) + + def replace_child(self, old: Node, new: Node) -> None: + pass + + @property + def extension(self) -> str: + return ".html" + + def output( + self, context: Context, destination: TextIOBase + ) -> None: + destination.write("outer") + + container = ContainerNode() + context = Context({}) + destination = StringIO() + + visitor = _OutputVisitor(context, destination) + container.visit(visitor) + + assert destination.getvalue() == "outer" + + def test_multiple_output_nodes(self) -> None: + first_node = MockOutputNode("first") + second_node = MockOutputNode("second") + + context = Context({}) + destination = StringIO() + + visitor = _OutputVisitor(context, destination) + + visitor.enter(first_node) + visitor.exit(first_node) + + visitor.enter(second_node) + visitor.exit(second_node) + + assert destination.getvalue() == "firstsecond" + + +class TestMainWithPythonFiles: + def test_processes_python_source(self, temp_dir: Path) -> None: + src_dir = temp_dir / "src" + src_dir.mkdir() + + py_file = src_dir / "example.py" + py_file.write_text( + '"""Module docstring."""\n\ndef hello():\n pass\n' + ) + + pyproject = temp_dir / "pyproject.toml" + pyproject.write_text( + f""" +[tool.docc] +discovery = ["docc.python.discover"] +build = ["docc.python.build"] +transform = [ + "docc.python.transform", + "docc.verbatim.transform", + "docc.references.index", + "docc.html.transform", +] +context = ["docc.html.context", "docc.references.context"] + +[tool.docc.plugins."docc.python.discover"] +paths = ["{src_dir}"] + +[tool.docc.output] +path = "docs" +""" + ) + + output_dir = temp_dir / "docs" + + original_cwd = os.getcwd() + try: + os.chdir(temp_dir) + main(["--output", str(output_dir)]) + finally: + os.chdir(original_cwd) + + assert output_dir.exists(), "Output directory should be created" + + +class TestCliWithMinimalConfig: + def test_empty_project(self, temp_dir: Path) -> None: + pyproject = temp_dir / "pyproject.toml" + pyproject.write_text( + """ +[tool.docc] +discovery = [] +build = [] +transform = [] +context = [] + +[tool.docc.output] +path = "docs" +""" + ) + + output_dir = temp_dir / "docs" + + original_cwd = os.getcwd() + try: + os.chdir(temp_dir) + # main() should complete without raising exceptions + main(["--output", str(output_dir)]) + finally: + os.chdir(original_cwd) + + # With no sources, the pipeline completes without producing output. + # This verifies empty configurations don't cause errors. + assert ( + not output_dir.exists() + ), "No output should be created for empty project" + + +class TestCliDuplicateContextProviders: + def test_duplicate_context_type_raises(self, temp_dir: Path) -> None: + """ + When two context plugins provide the same type, main() raises + an Exception about the conflict (cli.py:97-103). + """ + pyproject = temp_dir / "pyproject.toml" + # Use the same context plugin twice so provides() returns the + # same type for both entries. + pyproject.write_text( + """ +[tool.docc] +discovery = [] +build = [] +transform = [] +context = ["docc.references.context", "docc.references.context"] + +[tool.docc.output] +path = "docs" +""" + ) + + original_cwd = os.getcwd() + try: + os.chdir(temp_dir) + with pytest.raises(Exception, match="conflicts with"): + main(["--output", str(temp_dir / "docs")]) + finally: + os.chdir(original_cwd) + + +class TestCliDocumentNoExtension: + def test_document_without_extension_skipped( + self, temp_dir: Path, caplog: pytest.LogCaptureFixture + ) -> None: + """ + When a document has no extension (no OutputNode), the write + phase logs an error and skips it (cli.py:164-169). + """ + pyproject = temp_dir / "pyproject.toml" + # Use python discover and build, but do NOT include any + # transform that adds an OutputNode, so extension() returns None. + src_dir = temp_dir / "src" + src_dir.mkdir() + py_file = src_dir / "example.py" + py_file.write_text('"""Module docstring."""\n') + + pyproject.write_text( + f""" +[tool.docc] +discovery = ["docc.python.discover"] +build = ["docc.python.build"] +transform = [] +context = [] + +[tool.docc.plugins."docc.python.discover"] +paths = ["{src_dir}"] + +[tool.docc.output] +path = "docs" +""" + ) + + output_dir = temp_dir / "docs" + + original_cwd = os.getcwd() + try: + os.chdir(temp_dir) + with caplog.at_level(logging.ERROR): + main(["--output", str(output_dir)]) + finally: + os.chdir(original_cwd) + + assert any( + "does not specify a file extension" in r.message + for r in caplog.records + ) diff --git a/tests/test_context.py b/tests/test_context.py new file mode 100644 index 0000000..532a880 --- /dev/null +++ b/tests/test_context.py @@ -0,0 +1,152 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import tempfile +from pathlib import Path + +import pytest + +from docc.context import Context, load +from docc.settings import Settings + + +class TestContext: + def test_init_empty(self) -> None: + ctx = Context() + assert str not in ctx + + def test_init_with_items(self) -> None: + ctx = Context({str: "hello", int: 42}) + assert ctx[str] == "hello" + assert ctx[int] == 42 + + def test_init_with_none(self) -> None: + ctx = Context(None) + assert str not in ctx + with pytest.raises(KeyError): + ctx[str] + + def test_getitem_returns_value(self) -> None: + ctx = Context({str: "test"}) + assert ctx[str] == "test" + + def test_getitem_missing_raises(self) -> None: + ctx = Context({}) + with pytest.raises(KeyError): + ctx[str] + + def test_contains_true(self) -> None: + ctx = Context({str: "hello"}) + assert str in ctx + + def test_contains_false(self) -> None: + ctx = Context({}) + assert str not in ctx + + def test_init_validates_types(self) -> None: + with pytest.raises(ValueError, match="is not an instance"): + Context({str: 123}) + + def test_repr(self) -> None: + ctx = Context({str: "hello"}) + result = repr(ctx) + assert "Context" in result + assert "str" in result + + def test_multiple_types(self) -> None: + class CustomA: + pass + + class CustomB: + pass + + a = CustomA() + b = CustomB() + + ctx = Context({CustomA: a, CustomB: b}) + assert ctx[CustomA] is a + assert ctx[CustomB] is b + + def test_subclass_types(self) -> None: + class Base: + pass + + class Derived(Base): + pass + + d = Derived() + ctx = Context({Derived: d}) + assert ctx[Derived] is d + + def test_derived_stored_base_lookup_not_found(self) -> None: + """ + When a Derived instance is stored under its Derived key, + looking up by Base raises KeyError because Context uses exact + type matching on the dict key, not isinstance checks. + """ + + class Base: + pass + + class Derived(Base): + pass + + d = Derived() + ctx = Context({Derived: d}) + assert Base not in ctx + with pytest.raises(KeyError): + ctx[Base] + + +class TestContextLoad: + def test_load_empty_context_list(self) -> None: + with tempfile.TemporaryDirectory() as td: + settings = Settings( + Path(td), + {"tool": {"docc": {"context": []}}}, + ) + + result = list(load(settings)) + assert result == [] + + def test_load_single_context(self) -> None: + with tempfile.TemporaryDirectory() as td: + settings = Settings( + Path(td), + {"tool": {"docc": {"context": ["docc.references.context"]}}}, + ) + + result = list(load(settings)) + assert len(result) == 1 + assert result[0][0] == "docc.references.context" + + def test_load_multiple_contexts(self) -> None: + with tempfile.TemporaryDirectory() as td: + settings = Settings( + Path(td), + { + "tool": { + "docc": { + "context": [ + "docc.references.context", + "docc.search.context", + ] + } + } + }, + ) + + result = list(load(settings)) + assert len(result) == 2 diff --git a/tests/test_debug.py b/tests/test_debug.py new file mode 100644 index 0000000..8de197f --- /dev/null +++ b/tests/test_debug.py @@ -0,0 +1,115 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from io import StringIO +from pathlib import Path + +import pytest + +from docc.context import Context +from docc.document import BlankNode, Document, ListNode +from docc.plugins.debug import DebugNode, DebugTransform +from docc.settings import PluginSettings, Settings + + +@pytest.fixture +def plugin_settings() -> PluginSettings: + settings = Settings(Path("."), {"tool": {"docc": {}}}) + return settings.for_plugin("docc.debug.transform") + + +class TestDebugNode: + def test_init(self) -> None: + child = BlankNode() + node = DebugNode(child) + assert node.child is child + + def test_children(self) -> None: + child = BlankNode() + node = DebugNode(child) + children = tuple(node.children) + assert children == (child,) + + def test_replace_child(self) -> None: + old_child = BlankNode() + new_child = BlankNode() + node = DebugNode(old_child) + + node.replace_child(old_child, new_child) + assert node.child is new_child + + def test_replace_child_no_match(self) -> None: + child = BlankNode() + other = BlankNode() + new_child = BlankNode() + node = DebugNode(child) + + node.replace_child(other, new_child) + assert node.child is child + + def test_extension(self) -> None: + node = DebugNode(BlankNode()) + assert node.extension == ".txt" + + def test_output(self) -> None: + child = BlankNode() + node = DebugNode(child) + context = Context({}) + destination = StringIO() + + node.output(context, destination) + + result = destination.getvalue() + assert "" in result + + def test_output_nested(self) -> None: + inner = BlankNode() + outer = ListNode([inner]) + node = DebugNode(outer) + context = Context({}) + destination = StringIO() + + node.output(context, destination) + + result = destination.getvalue() + assert "" in result + assert "" in result + + +class TestDebugTransform: + def test_transform(self, plugin_settings: PluginSettings) -> None: + root = BlankNode() + document = Document(root) + context = Context({Document: document}) + + transform = DebugTransform(plugin_settings) + transform.transform(context) + + assert isinstance(document.root, DebugNode) + assert document.root.child is root + + def test_transform_with_nested( + self, plugin_settings: PluginSettings + ) -> None: + inner = BlankNode() + root = ListNode([inner]) + document = Document(root) + context = Context({Document: document}) + + transform = DebugTransform(plugin_settings) + transform.transform(context) + + assert isinstance(document.root, DebugNode) + assert document.root.child is root diff --git a/tests/test_document.py b/tests/test_document.py new file mode 100644 index 0000000..d909df8 --- /dev/null +++ b/tests/test_document.py @@ -0,0 +1,518 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from io import StringIO +from typing import List + +import pytest +from typing_extensions import override + +from docc.document import ( + BlankNode, + Document, + ListNode, + Node, + OutputNode, + Visit, + Visitor, + _ExtensionVisitor, + _StrVisitor, +) + + +class TestBlankNode: + def test_children_returns_empty_tuple(self) -> None: + node = BlankNode() + assert tuple(node.children) == () + + def test_replace_child_raises_type_error(self) -> None: + node = BlankNode() + with pytest.raises(TypeError): + node.replace_child(BlankNode(), BlankNode()) + + def test_repr(self) -> None: + node = BlankNode() + assert repr(node) == "" + + def test_bool_is_false(self) -> None: + node = BlankNode() + assert bool(node) is False + + +class TestListNode: + def test_children_property_returns_list(self) -> None: + first_child, second_child = BlankNode(), BlankNode() + node = ListNode([first_child, second_child]) + assert list(node.children) == [first_child, second_child] + + def test_default_children_is_empty(self) -> None: + node = ListNode() + assert list(node.children) == [] + + def test_replace_child(self) -> None: + old_child = BlankNode() + new_child = BlankNode() + other_child = BlankNode() + node = ListNode([old_child, other_child]) + + node.replace_child(old_child, new_child) + + assert list(node.children) == [new_child, other_child] + + def test_replace_child_when_not_found(self) -> None: + old_child = BlankNode() + new_child = BlankNode() + other_child = BlankNode() + node = ListNode([other_child]) + + node.replace_child(old_child, new_child) + assert list(node.children) == [other_child] + + def test_repr(self) -> None: + node = ListNode() + assert repr(node) == "" + + def test_bool_true_when_has_children(self) -> None: + node = ListNode([BlankNode()]) + assert bool(node) is True + + def test_bool_false_when_empty(self) -> None: + node = ListNode() + assert bool(node) is False + + def test_iter(self) -> None: + children: List[Node] = [BlankNode(), BlankNode()] + node = ListNode(children) + assert list(node.children) == children + + def test_len(self) -> None: + node = ListNode([BlankNode(), BlankNode(), BlankNode()]) + assert len(node) == 3 + + +class RecordingVisitor(Visitor): + events: List[str] + + def __init__(self) -> None: + self.events = [] + + @override + def enter(self, node: Node) -> Visit: + self.events.append(f"enter:{repr(node)}") + return Visit.TraverseChildren + + @override + def exit(self, node: Node) -> None: + self.events.append(f"exit:{repr(node)}") + + +class SkippingVisitor(Visitor): + events: List[str] + + def __init__(self) -> None: + self.events = [] + + @override + def enter(self, node: Node) -> Visit: + self.events.append(f"enter:{repr(node)}") + return Visit.SkipChildren + + @override + def exit(self, node: Node) -> None: + self.events.append(f"exit:{repr(node)}") + + +class TestNodeVisit: + def test_visit_single_node(self) -> None: + node = BlankNode() + visitor = RecordingVisitor() + node.visit(visitor) + + assert visitor.events == ["enter:", "exit:"] + + def test_visit_with_children(self) -> None: + first_child = BlankNode() + second_child = BlankNode() + parent = ListNode([first_child, second_child]) + + visitor = RecordingVisitor() + parent.visit(visitor) + + assert visitor.events == [ + "enter:", + "enter:", + "exit:", + "enter:", + "exit:", + "exit:", + ] + + def test_visit_skip_children(self) -> None: + child = BlankNode() + parent = ListNode([child]) + + visitor = SkippingVisitor() + parent.visit(visitor) + + assert visitor.events == ["enter:", "exit:"] + + def test_visit_nested_structure(self) -> None: + leaf = BlankNode() + inner = ListNode([leaf]) + outer = ListNode([inner]) + + visitor = RecordingVisitor() + outer.visit(visitor) + + assert visitor.events == [ + "enter:", + "enter:", + "enter:", + "exit:", + "exit:", + "exit:", + ] + + def test_visit_depth_first(self) -> None: + first_leaf, second_leaf, third_leaf = ( + BlankNode(), + BlankNode(), + BlankNode(), + ) + first_branch = ListNode([first_leaf, second_leaf]) + second_branch = ListNode([third_leaf]) + root = ListNode([first_branch, second_branch]) + + visitor = RecordingVisitor() + root.visit(visitor) + + enter_events = [e for e in visitor.events if e.startswith("enter")] + assert enter_events[0] == "enter:" + assert enter_events[1] == "enter:" + assert enter_events[2] == "enter:" + + +class TestNodeDump: + def test_dump_to_stringio(self) -> None: + node = BlankNode() + output = StringIO() + node.dump(file=output) + result = output.getvalue() + assert "" in result + + def test_dumps_returns_string(self) -> None: + node = BlankNode() + result = node.dumps() + assert isinstance(result, str) + assert "" in result + + def test_dump_nested_structure(self) -> None: + child = BlankNode() + parent = ListNode([child]) + + result = parent.dumps() + assert "" in result + assert "" in result + + +class TestStrVisitor: + def test_builds_rich_tree(self) -> None: + node = BlankNode() + visitor = _StrVisitor() + node.visit(visitor) + + assert visitor.root is not None + assert "" in str(visitor.root.label) + + def test_nested_tree(self) -> None: + child = BlankNode() + parent = ListNode([child]) + + visitor = _StrVisitor() + parent.visit(visitor) + + assert visitor.root is not None + + +class TestDocument: + def test_init_with_root(self) -> None: + root = BlankNode() + doc = Document(root) + assert doc.root is root + + def test_extension_returns_none_when_no_output_nodes(self) -> None: + root = BlankNode() + doc = Document(root) + assert doc.extension() is None + + def test_extension_returns_extension_from_output_node(self) -> None: + from io import TextIOBase + + from docc.context import Context + + class TestOutputNode(OutputNode): + @property + def children(self): + return () + + def replace_child(self, old: Node, new: Node) -> None: + pass + + @property + def extension(self) -> str: + return ".test" + + def output( + self, context: Context, destination: TextIOBase + ) -> None: + pass + + root = TestOutputNode() + doc = Document(root) + assert doc.extension() == ".test" + + +class TestExtensionVisitor: + def test_finds_extension(self) -> None: + from io import TextIOBase + + from docc.context import Context + + class TestOutputNode(OutputNode): + @property + def children(self): + return () + + def replace_child(self, old: Node, new: Node) -> None: + pass + + @property + def extension(self) -> str: + return ".html" + + def output( + self, context: Context, destination: TextIOBase + ) -> None: + pass + + root = TestOutputNode() + visitor = _ExtensionVisitor() + root.visit(visitor) + + assert visitor.extension == ".html" + + def test_returns_none_when_no_output_nodes(self) -> None: + root = BlankNode() + visitor = _ExtensionVisitor() + root.visit(visitor) + + assert visitor.extension is None + + def test_conflicting_extensions_logs_warning( + self, caplog: pytest.LogCaptureFixture + ) -> None: + """ + When two OutputNodes have different extensions, the visitor + logs a warning and keeps the first extension. + """ + import logging + from io import TextIOBase + + from docc.context import Context + + class HtmlOutputNode(OutputNode): + @property + def children(self): + return () + + def replace_child(self, old: Node, new: Node) -> None: + pass + + @property + def extension(self) -> str: + return ".html" + + def output( + self, context: Context, destination: TextIOBase + ) -> None: + pass + + class TxtOutputNode(OutputNode): + @property + def children(self): + return () + + def replace_child(self, old: Node, new: Node) -> None: + pass + + @property + def extension(self) -> str: + return ".txt" + + def output( + self, context: Context, destination: TextIOBase + ) -> None: + pass + + root = ListNode([HtmlOutputNode(), TxtOutputNode()]) + visitor = _ExtensionVisitor() + + with caplog.at_level(logging.WARNING): + root.visit(visitor) + + # The first extension is kept + assert visitor.extension == ".html" + # A warning was logged about the conflict + assert any("extension" in r.message for r in caplog.records) + + +class ConditionalSkipVisitor(Visitor): + events: List[str] + skip_after_first: bool + + def __init__(self, skip_after_first: bool = False) -> None: + self.events = [] + self.skip_after_first = skip_after_first + self._first_seen = False + + @override + def enter(self, node: Node) -> Visit: + self.events.append(f"enter:{repr(node)}") + if self.skip_after_first and not self._first_seen: + self._first_seen = True + return Visit.TraverseChildren + elif self.skip_after_first: + return Visit.SkipChildren + return Visit.TraverseChildren + + @override + def exit(self, node: Node) -> None: + self.events.append(f"exit:{repr(node)}") + + +class TestVisitorEdgeCases: + def test_visit_empty_list_node(self) -> None: + node = ListNode([]) + visitor = RecordingVisitor() + node.visit(visitor) + + assert visitor.events == ["enter:", "exit:"] + + def test_visit_deeply_nested(self) -> None: + node: Node = BlankNode() + for _ in range(10): + node = ListNode([node]) + + visitor = RecordingVisitor() + node.visit(visitor) + + enter_count = sum(1 for e in visitor.events if e.startswith("enter")) + assert enter_count == 11 + + def test_visit_wide_tree(self) -> None: + children: List[Node] = [BlankNode() for _ in range(100)] + node = ListNode(children) + + visitor = RecordingVisitor() + node.visit(visitor) + + blank_enters = sum(1 for e in visitor.events if e == "enter:") + assert blank_enters == 100 + + def test_conditional_skip(self) -> None: + grandchild = BlankNode() + child = ListNode([grandchild]) + parent = ListNode([child]) + + visitor = ConditionalSkipVisitor(skip_after_first=True) + parent.visit(visitor) + + assert "enter:" not in visitor.events + + +class ModifyingVisitor(Visitor): + def __init__(self, old: Node, new: Node) -> None: + self.old = old + self.new = new + self.stack: List[Node] = [] + + @override + def enter(self, node: Node) -> Visit: + self.stack.append(node) + return Visit.TraverseChildren + + @override + def exit(self, node: Node) -> None: + self.stack.pop() + if node == self.old and self.stack: + self.stack[-1].replace_child(self.old, self.new) + + +class SkipSpecificChildVisitor(Visitor): + """Visitor that returns SkipChildren for a specific child node.""" + + enter_calls: List[Node] + exit_calls: List[Node] + skip_target: Node + + def __init__(self, skip_target: Node) -> None: + self.enter_calls = [] + self.exit_calls = [] + self.skip_target = skip_target + + @override + def enter(self, node: Node) -> Visit: + self.enter_calls.append(node) + if node is self.skip_target: + return Visit.SkipChildren + return Visit.TraverseChildren + + @override + def exit(self, node: Node) -> None: + self.exit_calls.append(node) + + +class TestSkipChildrenCallsExit: + def test_skip_children_calls_exit_on_skipped_child(self) -> None: + grandchild = BlankNode() + skipped_child = ListNode([grandchild]) + other_child = BlankNode() + parent = ListNode([skipped_child, other_child]) + + visitor = SkipSpecificChildVisitor(skip_target=skipped_child) + parent.visit(visitor) + + # exit must be called for the child that returned SkipChildren + assert skipped_child in visitor.exit_calls + + # The grandchild should NOT have been entered (children were skipped) + assert grandchild not in visitor.enter_calls + + # Both the skipped child and the other child should have exit called + assert other_child in visitor.exit_calls + assert parent in visitor.exit_calls + + +class TestVisitorModification: + def test_replace_during_exit(self) -> None: + old_child = BlankNode() + new_child = BlankNode() + parent = ListNode([old_child]) + + visitor = ModifyingVisitor(old_child, new_child) + parent.visit(visitor) + + assert old_child not in parent.children + assert new_child in parent.children diff --git a/tests/test_files.py b/tests/test_files.py new file mode 100644 index 0000000..a0ff541 --- /dev/null +++ b/tests/test_files.py @@ -0,0 +1,226 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import tempfile +from io import StringIO +from pathlib import Path, PurePath +from typing import Dict, Iterator, Optional, Set + +import pytest + +from docc.context import Context +from docc.document import BlankNode, Document +from docc.plugins.files import ( + FileNode, + FilesBuilder, + FilesDiscover, + FileSource, +) +from docc.settings import PluginSettings, Settings +from docc.source import Source + + +@pytest.fixture +def temp_dir() -> Iterator[Path]: + with tempfile.TemporaryDirectory() as td: + yield Path(td) + + +@pytest.fixture +def plugin_settings(temp_dir: Path) -> PluginSettings: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + return settings.for_plugin("docc.files.discover") + + +class TestFileSource: + def test_init(self) -> None: + relative = PurePath("src/file.txt") + absolute = PurePath("/path/to/src/file.txt") + source = FileSource(relative, absolute) + + assert source.relative_path == relative + assert source.absolute_path == absolute + + def test_output_path_removes_suffix(self) -> None: + relative = PurePath("docs/readme.md") + absolute = PurePath("/path/docs/readme.md") + source = FileSource(relative, absolute) + + assert source.output_path == PurePath("docs/readme") + + def test_output_path_no_suffix(self) -> None: + relative = PurePath("docs/readme") + absolute = PurePath("/path/docs/readme") + source = FileSource(relative, absolute) + + assert source.output_path == PurePath("docs/readme") + + +class TestFileNode: + def test_init(self, temp_dir: Path) -> None: + file_path = temp_dir / "test.txt" + file_path.write_text("content") + + node = FileNode(file_path) + assert node.path == file_path + + def test_children_empty(self, temp_dir: Path) -> None: + file_path = temp_dir / "test.txt" + file_path.write_text("content") + + node = FileNode(file_path) + assert node.children == () + + def test_replace_child_raises(self, temp_dir: Path) -> None: + file_path = temp_dir / "test.txt" + file_path.write_text("content") + + node = FileNode(file_path) + with pytest.raises(TypeError): + node.replace_child(BlankNode(), BlankNode()) + + def test_extension(self, temp_dir: Path) -> None: + file_path = temp_dir / "test.txt" + file_path.write_text("content") + + node = FileNode(file_path) + assert node.extension == ".txt" + + def test_extension_multiple_suffixes(self, temp_dir: Path) -> None: + file_path = temp_dir / "archive.tar.gz" + file_path.write_text("content") + + node = FileNode(file_path) + assert node.extension == ".gz" + + def test_output(self, temp_dir: Path) -> None: + file_path = temp_dir / "test.txt" + file_path.write_text("file content here") + + node = FileNode(file_path) + context = Context({}) + destination = StringIO() + + node.output(context, destination) + + assert destination.getvalue() == "file content here" + + +class TestFilesBuilder: + def test_build_processes_file_sources( + self, temp_dir: Path, plugin_settings: PluginSettings + ) -> None: + file_path = temp_dir / "test.txt" + file_path.write_text("content") + + source = FileSource(PurePath("test.txt"), file_path) + unprocessed: Set[Source] = {source} + processed: Dict[Source, Document] = {} + + builder = FilesBuilder(plugin_settings) + builder.build(unprocessed, processed) + + assert len(unprocessed) == 0 + assert source in processed + assert isinstance(processed[source].root, FileNode) + + def test_build_ignores_non_file_sources( + self, plugin_settings: PluginSettings + ) -> None: + class OtherSource(Source): + @property + def relative_path(self) -> Optional[PurePath]: + return PurePath("other.py") + + @property + def output_path(self) -> PurePath: + return PurePath("other.py") + + source = OtherSource() + unprocessed: Set[Source] = {source} + processed: Dict[Source, Document] = {} + + builder = FilesBuilder(plugin_settings) + builder.build(unprocessed, processed) + + assert source in unprocessed + assert len(processed) == 0 + + +class TestFilesDiscover: + def test_init_no_files(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + plugin_settings = settings.for_plugin("docc.files.discover") + + discover = FilesDiscover(plugin_settings) + assert discover.sources == [] + + def test_init_with_files(self, temp_dir: Path) -> None: + first_file = temp_dir / "file1.txt" + first_file.write_text("content1") + second_file = temp_dir / "file2.txt" + second_file.write_text("content2") + + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.files.discover": { + "files": ["file1.txt", "file2.txt"] + } + } + } + } + }, + ) + plugin_settings = settings.for_plugin("docc.files.discover") + + discover = FilesDiscover(plugin_settings) + assert len(discover.sources) == 2 + + def test_discover_yields_sources(self, temp_dir: Path) -> None: + first_file = temp_dir / "file1.txt" + first_file.write_text("content") + + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.files.discover": {"files": ["file1.txt"]} + } + } + } + }, + ) + plugin_settings = settings.for_plugin("docc.files.discover") + + discover = FilesDiscover(plugin_settings) + sources = list(discover.discover(frozenset())) + + assert len(sources) == 1 + assert isinstance(sources[0], FileSource) + + def test_discover_empty_when_no_files(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + plugin_settings = settings.for_plugin("docc.files.discover") + + discover = FilesDiscover(plugin_settings) + sources = list(discover.discover(frozenset())) + + assert sources == [] diff --git a/tests/test_html.py b/tests/test_html.py new file mode 100644 index 0000000..84b682e --- /dev/null +++ b/tests/test_html.py @@ -0,0 +1,679 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import tempfile +from io import StringIO +from pathlib import Path, PurePath +from typing import Iterator, Optional +from unittest.mock import patch + +import pytest + +from docc.context import Context +from docc.document import BlankNode, Document, ListNode, Node, Visit +from docc.plugins.html import ( + HTML, + HTMLContext, + HTMLDiscover, + HTMLParser, + HTMLRoot, + HTMLTag, + HTMLTransform, + HTMLVisitor, + TextNode, + _ElementTreeVisitor, + _make_relative, + blank_node, + html_tag, + list_node, + render_reference, + text_node, +) +from docc.plugins.loader import PluginError +from docc.plugins.references import Index, Reference +from docc.settings import PluginSettings, Settings, SettingsError +from docc.source import Source + + +class MockSource(Source): + _output_path: PurePath + + def __init__(self, output_path: Optional[PurePath] = None) -> None: + self._output_path = ( + output_path if output_path is not None else PurePath("test.py") + ) + + @property + def relative_path(self) -> Optional[PurePath]: + return self._output_path + + @property + def output_path(self) -> PurePath: + return self._output_path + + +@pytest.fixture +def temp_dir() -> Iterator[Path]: + with tempfile.TemporaryDirectory() as td: + yield Path(td) + + +@pytest.fixture +def basic_settings(temp_dir: Path) -> Settings: + return Settings(temp_dir, {"tool": {"docc": {}}}) + + +@pytest.fixture +def plugin_settings(basic_settings: Settings) -> PluginSettings: + return basic_settings.for_plugin("docc.html") + + +class TestTextNode: + def test_init_with_value(self) -> None: + node = TextNode("hello") + assert node._value == "hello" + + def test_children_empty(self) -> None: + node = TextNode("test") + assert tuple(node.children) == () + + def test_replace_child_raises(self) -> None: + node = TextNode("test") + with pytest.raises(TypeError, match="text nodes have no children"): + node.replace_child(BlankNode(), BlankNode()) + + def test_repr(self) -> None: + node = TextNode("hello world") + assert repr(node) == "'hello world'" + + +class TestHTMLTag: + def test_init_basic(self) -> None: + tag = HTMLTag("div") + assert tag.tag_name == "div" + assert tag.attributes == {} + assert list(tag.children) == [] + + def test_init_with_attributes(self) -> None: + tag = HTMLTag("a", {"href": "/test", "class": "link"}) + assert tag.attributes["href"] == "/test" + assert tag.attributes["class"] == "link" + + def test_append_child(self) -> None: + parent = HTMLTag("div") + child = HTMLTag("span") + parent.append(child) + assert child in parent.children + + def test_append_text(self) -> None: + tag = HTMLTag("p") + text = TextNode("hello") + tag.append(text) + assert text in tag.children + + def test_replace_child(self) -> None: + parent = HTMLTag("div") + old = HTMLTag("span") + new = HTMLTag("p") + parent.append(old) + + parent.replace_child(old, new) + assert old not in parent.children + assert new in parent.children + + def test_repr_basic(self) -> None: + tag = HTMLTag("div") + assert repr(tag) == "
" + + def test_repr_with_attributes(self) -> None: + tag = HTMLTag("a", {"href": "/test"}) + result = repr(tag) + assert " None: + tag = HTMLTag("div", {"data-value": 'test"quote'}) + result = repr(tag) + assert """ in result + + def test_repr_attribute_without_value(self) -> None: + tag = HTMLTag("input", {"disabled": None}) + result = repr(tag) + assert "disabled" in result + + +class TestHTMLRoot: + def test_init(self) -> None: + context = Context({}) + root = HTMLRoot(context) + assert list(root.children) == [] + + def test_append(self) -> None: + context = Context({}) + root = HTMLRoot(context) + tag = HTMLTag("div") + root.append(tag) + assert tag in root.children + + def test_replace_child(self) -> None: + context = Context({}) + root = HTMLRoot(context) + old = HTMLTag("div") + new = HTMLTag("span") + root.append(old) + + root.replace_child(old, new) + assert old not in root.children + assert new in root.children + + def test_extension(self) -> None: + context = Context({}) + root = HTMLRoot(context) + assert root.extension == ".html" + + +class TestHTMLContext: + def test_provides_html_type(self) -> None: + assert HTMLContext.provides() == HTML + + def test_init_default_values( + self, plugin_settings: PluginSettings + ) -> None: + ctx = HTMLContext(plugin_settings) + html = ctx.provide() + assert html.extra_css == [] + assert html.breadcrumbs is True + + def test_init_with_extra_css(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.html.context": { + "extra_css": ["custom.css", "theme.css"] + } + } + } + } + }, + ) + plugin_settings = settings.for_plugin("docc.html.context") + ctx = HTMLContext(plugin_settings) + html = ctx.provide() + assert html.extra_css == ["custom.css", "theme.css"] + + def test_init_invalid_extra_css_raises(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": {"docc.html.context": {"extra_css": [123]}} + } + } + }, + ) + plugin_settings = settings.for_plugin("docc.html.context") + with pytest.raises(SettingsError, match="extra_css"): + HTMLContext(plugin_settings) + + def test_init_breadcrumbs_false(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.html.context": {"breadcrumbs": False} + } + } + } + }, + ) + plugin_settings = settings.for_plugin("docc.html.context") + ctx = HTMLContext(plugin_settings) + html = ctx.provide() + assert html.breadcrumbs is False + + def test_init_invalid_breadcrumbs_raises(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.html.context": {"breadcrumbs": "yes"} + } + } + } + }, + ) + plugin_settings = settings.for_plugin("docc.html.context") + with pytest.raises(SettingsError, match="breadcrumbs"): + HTMLContext(plugin_settings) + + +class TestHTMLDiscover: + def test_discover_yields_static_resources( + self, plugin_settings: PluginSettings + ) -> None: + discover = HTMLDiscover(plugin_settings) + sources = list(discover.discover(frozenset())) + + assert len(sources) == 4 + + output_paths = [str(s.output_path) for s in sources] + assert any("chota" in p for p in output_paths) + assert any("docc" in p for p in output_paths) + assert any("fuse" in p for p in output_paths) + assert any("search" in p for p in output_paths) + + +class TestHTMLParser: + def test_parse_simple_tag(self) -> None: + context = Context({}) + parser = HTMLParser(context) + parser.feed("
hello
") + + children = list(parser.root.children) + assert len(children) == 1 + child = children[0] + assert isinstance(child, HTMLTag) + assert child.tag_name == "div" + + def test_parse_nested_tags(self) -> None: + context = Context({}) + parser = HTMLParser(context) + parser.feed("
text
") + + children = list(parser.root.children) + assert len(children) == 1 + div = children[0] + assert isinstance(div, HTMLTag) + + span = list(div.children)[0] + assert isinstance(span, HTMLTag) + assert span.tag_name == "span" + + def test_parse_with_attributes(self) -> None: + context = Context({}) + parser = HTMLParser(context) + parser.feed('click') + + children = list(parser.root.children) + anchor = children[0] + assert isinstance(anchor, HTMLTag) + assert anchor.attributes["href"] == "/test" + assert anchor.attributes["class"] == "link" + + def test_parse_text_content(self) -> None: + context = Context({}) + parser = HTMLParser(context) + parser.feed("

hello world

") + + children = list(parser.root.children) + p = children[0] + assert isinstance(p, HTMLTag) + text_children = list(p.children) + assert len(text_children) == 1 + text_child = text_children[0] + assert isinstance(text_child, TextNode) + assert text_child._value == "hello world" + + def test_parse_multiple_elements(self) -> None: + context = Context({}) + parser = HTMLParser(context) + parser.feed("

one

two

") + + children = list(parser.root.children) + assert len(children) == 2 + + +class TestElementTreeVisitor: + def test_basic_tag(self) -> None: + tag = HTMLTag("div") + visitor = _ElementTreeVisitor() + tag.visit(visitor) + element = visitor.builder.close() + + assert element.tag == "div" + + def test_tag_with_attributes(self) -> None: + tag = HTMLTag("a", {"href": "/test"}) + visitor = _ElementTreeVisitor() + tag.visit(visitor) + element = visitor.builder.close() + + assert element.attrib["href"] == "/test" + + def test_nested_tags(self) -> None: + parent = HTMLTag("div") + child = HTMLTag("span") + parent.append(child) + + visitor = _ElementTreeVisitor() + parent.visit(visitor) + element = visitor.builder.close() + + assert element.tag == "div" + assert len(list(element)) == 1 + assert list(element)[0].tag == "span" + + def test_text_node(self) -> None: + tag = HTMLTag("p") + tag.append(TextNode("hello")) + + visitor = _ElementTreeVisitor() + tag.visit(visitor) + element = visitor.builder.close() + + assert element.text == "hello" + + +class TestMakeRelative: + def test_same_path_returns_none(self) -> None: + path = PurePath("a/b/c") + result = _make_relative(path, path) + assert result is None + + def test_sibling_file(self) -> None: + from_path = PurePath("a/b/c.html") + to_path = PurePath("a/b/d.html") + result = _make_relative(from_path, to_path) + assert result == PurePath("d.html") + + def test_parent_directory(self) -> None: + from_path = PurePath("a/b/c.html") + to_path = PurePath("a/d.html") + result = _make_relative(from_path, to_path) + assert result == PurePath("../d.html") + + def test_deeper_directory(self) -> None: + from_path = PurePath("a/b.html") + to_path = PurePath("a/c/d.html") + result = _make_relative(from_path, to_path) + assert result == PurePath("c/d.html") + + +class TestRenderFunctions: + def test_blank_node_returns_none(self) -> None: + context = Context({}) + parent = HTMLRoot(context) + blank = BlankNode() + + result = blank_node(context, parent, blank) + assert result is None + + def test_list_node_returns_parent(self) -> None: + context = Context({}) + parent = HTMLRoot(context) + node = ListNode() + + result = list_node(context, parent, node) + assert result is parent + + def test_html_tag_appends_to_parent(self) -> None: + context = Context({}) + parent = HTMLRoot(context) + tag = HTMLTag("div") + + result = html_tag(context, parent, tag) + assert result is None + assert tag in parent.children + + def test_text_node_appends_to_parent(self) -> None: + context = Context({}) + parent = HTMLRoot(context) + text = TextNode("hello") + + result = text_node(context, parent, text) + assert result is None + assert text in parent.children + + +class TestHTMLTransform: + def test_transform_skips_output_nodes( + self, plugin_settings: PluginSettings + ) -> None: + context_obj = Context({}) + root = HTMLRoot(context_obj) + document = Document(root) + context = Context({Document: document}) + + transform = HTMLTransform(plugin_settings) + transform.transform(context) + + assert context[Document].root is root + + +class TestHTMLEdgeCases: + def test_html_tag_none_attribute_value(self) -> None: + tag = HTMLTag("input", {"disabled": None, "type": "text"}) + result = repr(tag) + assert "disabled" in result + assert 'type="text"' in result + + def test_deeply_nested_html(self) -> None: + context = Context({}) + parser = HTMLParser(context) + html = "
deep
" + parser.feed(html) + + children = list(parser.root.children) + assert len(children) == 1 + + def test_html_with_special_characters(self) -> None: + context = Context({}) + parser = HTMLParser(context) + parser.feed("

<script>

") + + children = list(parser.root.children) + p = children[0] + assert isinstance(p, HTMLTag) + text_children = list(p.children) + text_child = text_children[0] + assert isinstance(text_child, TextNode) + assert "") + assert text._value == "" + + +class TestHTMLVisitorTraversal: + def test_visitor_traversal_blank_node(self) -> None: + context = Context({}) + visitor = HTMLVisitor(context) + + blank = BlankNode() + blank.visit(visitor) + + assert len(visitor.stack) == 1 + assert visitor.stack[0] is visitor.root diff --git a/tests/test_integration.py b/tests/test_integration.py new file mode 100644 index 0000000..dc0e4f4 --- /dev/null +++ b/tests/test_integration.py @@ -0,0 +1,1001 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import tempfile +from pathlib import Path, PurePath +from typing import Dict, Iterator, List, Optional, Set, Tuple, Type + +import libcst +import pytest + +from docc.context import Context +from docc.document import BlankNode, Document, ListNode, Node, Visit, Visitor +from docc.plugins.mistletoe import ( + DocstringTransform, + MarkdownNode, + ReferenceTransform, + _SearchVisitor, +) +from docc.plugins.python import nodes +from docc.plugins.python.cst import ( + PythonBuilder, + PythonDiscover, + PythonTransform, +) +from docc.plugins.references import ( + Definition, + Index, + IndexTransform, + Reference, +) +from docc.settings import PluginSettings, Settings +from docc.source import Source + + +@pytest.fixture +def temp_dir() -> Iterator[Path]: + with tempfile.TemporaryDirectory() as td: + yield Path(td) + + +class MockSource(Source): + _output_path: PurePath + + def __init__(self, output_path: Optional[PurePath] = None) -> None: + self._output_path = ( + output_path if output_path is not None else PurePath("test.py") + ) + + @property + def relative_path(self) -> Optional[PurePath]: + return self._output_path + + @property + def output_path(self) -> PurePath: + return self._output_path + + +class NodeCollector(Visitor): + """Visitor that collects all nodes in the tree for easy assertion.""" + + def __init__(self) -> None: + self.all_nodes: List[Node] = [] + self.by_type: Dict[Type[Node], List[Node]] = {} + + def enter(self, node: Node) -> Visit: + self.all_nodes.append(node) + node_type = type(node) + if node_type not in self.by_type: + self.by_type[node_type] = [] + self.by_type[node_type].append(node) + return Visit.TraverseChildren + + def exit(self, node: Node) -> None: + pass + + def get(self, node_type: Type[Node]) -> List[Node]: + return self.by_type.get(node_type, []) + + def get_names(self, node_type: Type[Node]) -> List[str]: + """Extract name strings from nodes that have a name attribute.""" + result = [] + for node in self.get(node_type): + name_node = getattr(node, "name", None) + if isinstance(name_node, nodes.Name): + result.append(name_node.name) + return result + + +def _run_pipeline( + temp_dir: Path, + py_content: str, + filename: str = "example.py", +) -> Tuple[Document, Source, Context]: + """Helper: write source, discover, build, and transform.""" + (temp_dir / filename).write_text(py_content) + + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.python.discover": {"paths": [str(temp_dir)]}, + "docc.python.transform": {}, + } + } + } + }, + ) + + discover_settings = settings.for_plugin("docc.python.discover") + discover = PythonDiscover(discover_settings) + sources: Set[Source] = set(discover.discover(frozenset())) + + builder = PythonBuilder(discover_settings) + documents: Dict[Source, Document] = {} + builder.build(sources, documents) + + document = list(documents.values())[0] + source = list(documents.keys())[0] + index = Index() + + context = Context({Document: document, Source: source, Index: index}) + + transform_settings = settings.for_plugin("docc.python.transform") + transform = PythonTransform(transform_settings) + transform.transform(context) + + return document, source, context + + +class TestPythonPipeline: + def test_discover_build_transform_pipeline(self, temp_dir: Path) -> None: + py_content = '''"""Module docstring.""" + +class MyClass: + """A test class.""" + + def method(self, x: int) -> str: + """A method.""" + return str(x) + + +def standalone_func(arg: str) -> None: + """Standalone function.""" + pass +''' + (temp_dir / "example.py").write_text(py_content) + + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.python.discover": {"paths": [str(temp_dir)]}, + "docc.python.transform": {}, + } + } + } + }, + ) + + discover_settings = settings.for_plugin("docc.python.discover") + discover = PythonDiscover(discover_settings) + sources: Set[Source] = set(discover.discover(frozenset())) + assert len(sources) == 1 + + builder = PythonBuilder(discover_settings) + documents: Dict[Source, Document] = {} + builder.build(sources, documents) + assert len(documents) == 1 + + document = list(documents.values())[0] + source = list(documents.keys())[0] + index = Index() + + context = Context({Document: document, Source: source, Index: index}) + + transform_settings = settings.for_plugin("docc.python.transform") + transform = PythonTransform(transform_settings) + transform.transform(context) + + # Verify the tree contains Module, Class, Function nodes + class NodeTypeChecker(Visitor): + found_module = False + found_class = False + found_function = False + + def enter(self, node: Node) -> Visit: + if isinstance(node, nodes.Module): + self.found_module = True + elif isinstance(node, nodes.Class): + self.found_class = True + elif isinstance(node, nodes.Function): + self.found_function = True + return Visit.TraverseChildren + + def exit(self, node: Node) -> None: + pass + + checker = NodeTypeChecker() + document.root.visit(checker) + assert checker.found_module, "Should contain a Module node" + assert checker.found_class, "Should contain a Class node" + assert checker.found_function, "Should contain a Function" + + # Strengthened assertions: verify names and structure + collector = NodeCollector() + document.root.visit(collector) + + # Module should have a name derived from the filename + modules = collector.get(nodes.Module) + assert len(modules) == 1 + module = modules[0] + assert isinstance(module, nodes.Module) + assert isinstance(module.name, nodes.Name) + + # Class should be named "MyClass" + class_names = collector.get_names(nodes.Class) + assert ( + "MyClass" in class_names + ), f"Expected 'MyClass' in class names, got {class_names}" + + # Functions should include "method" and "standalone_func" + func_names = collector.get_names(nodes.Function) + assert ( + "method" in func_names + ), f"Expected 'method' in function names, got {func_names}" + assert ( + "standalone_func" in func_names + ), f"Expected 'standalone_func' in function names, got {func_names}" + + # The "method" function should have parameters + for func_node in collector.get(nodes.Function): + assert isinstance(func_node, nodes.Function) + if ( + isinstance(func_node.name, nodes.Name) + and func_node.name.name == "method" + ): + assert isinstance(func_node.parameters, ListNode) + params = list(func_node.parameters.children) + assert len(params) > 0, "method should have parameters" + break + else: + raise AssertionError("Function 'method' not found") + + def test_python_with_docstring_transform(self, temp_dir: Path) -> None: + py_content = '''""" +Module with **markdown** docstring. +""" + +def func(): + """Function with *emphasis*.""" + pass +''' + (temp_dir / "markdown_example.py").write_text(py_content) + + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.python.discover": {"paths": [str(temp_dir)]}, + } + } + } + }, + ) + + discover_settings = settings.for_plugin("docc.python.discover") + discover = PythonDiscover(discover_settings) + sources: Set[Source] = set(discover.discover(frozenset())) + + builder = PythonBuilder(discover_settings) + documents: Dict[Source, Document] = {} + builder.build(sources, documents) + + document = list(documents.values())[0] + source = list(documents.keys())[0] + index = Index() + + context = Context({Document: document, Source: source, Index: index}) + + transform = PythonTransform( + settings.for_plugin("docc.python.transform") + ) + transform.transform(context) + + docstring_transform = DocstringTransform( + settings.for_plugin("docc.mistletoe.transform") + ) + docstring_transform.transform(context) + + # Verify docstrings were converted to MarkdownNode + class MarkdownNodeChecker(Visitor): + found_markdown = False + + def enter(self, node: Node) -> Visit: + if isinstance(node, MarkdownNode): + self.found_markdown = True + return Visit.TraverseChildren + + def exit(self, node: Node) -> None: + pass + + checker = MarkdownNodeChecker() + document.root.visit(checker) + assert ( + checker.found_markdown + ), "Docstrings should be converted to MarkdownNode" + + # Strengthened assertions: verify markdown content + collector = NodeCollector() + document.root.visit(collector) + markdown_nodes = collector.get(MarkdownNode) + assert ( + len(markdown_nodes) >= 1 + ), "Should have at least one MarkdownNode" + + # At least one markdown node should contain searchable text + # from the module or function docstring + all_search_text = [] + for md_node in markdown_nodes: + assert isinstance(md_node, MarkdownNode) + search_content = md_node.to_search() + if isinstance(search_content, str): + all_search_text.append(search_content) + combined_text = " ".join(all_search_text) + assert ( + "markdown" in combined_text.lower() + ), f"Expected 'markdown' in search text, got: {combined_text}" + assert ( + "emphasis" in combined_text.lower() + ), f"Expected 'emphasis' in search text, got: {combined_text}" + + +class TestMistletoeTransforms: + def test_docstring_to_markdown(self) -> None: + docstring = nodes.Docstring("This is **bold** and *italic*.") + root = ListNode([docstring]) + document = Document(root) + context = Context({Document: document}) + + transform = DocstringTransform( + PluginSettings(Settings(Path("."), {}), {}) + ) + transform.transform(context) + + assert not isinstance(list(document.root.children)[0], nodes.Docstring) + + def test_reference_transform_converts_ref_links(self) -> None: + import mistletoe as md + + markdown = "[link text](ref:some.identifier)" + root = MarkdownNode(md.Document(markdown)) + document = Document(root) + context = Context({Document: document}) + + transform = ReferenceTransform( + PluginSettings(Settings(Path("."), {}), {}) + ) + transform.transform(context) + + class ReferenceChecker(Visitor): + found_reference = False + + def enter(self, node: Node) -> Visit: + if isinstance(node, Reference): + self.found_reference = True + assert node.identifier == "some.identifier" + return Visit.TraverseChildren + + def exit(self, node: Node) -> None: + pass + + checker = ReferenceChecker() + document.root.visit(checker) + assert checker.found_reference + + def test_reference_transform_ignores_http_links(self) -> None: + import mistletoe as md + + markdown = "[external](https://example.com)" + root = MarkdownNode(md.Document(markdown)) + document = Document(root) + context = Context({Document: document}) + + transform = ReferenceTransform( + PluginSettings(Settings(Path("."), {}), {}) + ) + transform.transform(context) + + class ReferenceChecker(Visitor): + found_reference = False + + def enter(self, node: Node) -> Visit: + if isinstance(node, Reference): + self.found_reference = True + return Visit.TraverseChildren + + def exit(self, node: Node) -> None: + pass + + checker = ReferenceChecker() + document.root.visit(checker) + assert not checker.found_reference + + +class TestIndexTransformIntegration: + def test_indexes_definitions_in_tree(self) -> None: + first_definition = Definition(identifier="module.ClassA") + second_definition = Definition(identifier="module.func_b") + root = ListNode([first_definition, second_definition]) + document = Document(root) + + source = MockSource() + index = Index() + context = Context({Document: document, Source: source, Index: index}) + + transform = IndexTransform(PluginSettings(Settings(Path("."), {}), {})) + transform.transform(context) + + assert first_definition.specifier == 0 + assert second_definition.specifier == 0 + + class_locations = list(index.lookup("module.ClassA")) + func_locations = list(index.lookup("module.func_b")) + assert len(class_locations) == 1 + assert len(func_locations) == 1 + + +class TestSearchVisitor: + def test_collect_text_from_markdown(self) -> None: + import mistletoe as md + + markdown = "Hello **world** and *everyone*" + root = MarkdownNode(md.Document(markdown)) + + texts = _SearchVisitor.collect(root) + combined = " ".join(texts) + + assert "Hello" in combined + assert "world" in combined + assert "everyone" in combined + + +class TestMarkdownNode: + def test_children_lazy_loaded(self) -> None: + import mistletoe as md + + markdown = "Test **bold**" + node = MarkdownNode(md.Document(markdown)) + + children = list(node.children) + assert len(children) > 0 + + def test_replace_child(self) -> None: + import mistletoe as md + + markdown = "Test **bold**" + node = MarkdownNode(md.Document(markdown)) + + children = list(node.children) + old_child = children[0] + new_child = BlankNode() + + node.replace_child(old_child, new_child) + + new_children = list(node.children) + assert new_child in new_children + + def test_to_search(self) -> None: + import mistletoe as md + + markdown = "Searchable text here" + node = MarkdownNode(md.Document(markdown)) + + result = node.to_search() + assert "Searchable" in result + + def test_search_children_returns_false(self) -> None: + import mistletoe as md + + markdown = "Test" + node = MarkdownNode(md.Document(markdown)) + + assert node.search_children() is False + + def test_repr(self) -> None: + import mistletoe as md + + markdown = "Test" + node = MarkdownNode(md.Document(markdown)) + + assert "MarkdownNode" in repr(node) + assert "Document" in repr(node) + + +class TestFullTransformPipeline: + def test_python_to_html_references(self, temp_dir: Path) -> None: + py_content = '''""" +Module with references. + +See [other](ref:module.other) for more. +""" + +def other(): + """Another function.""" + pass +''' + (temp_dir / "references.py").write_text(py_content) + + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.python.discover": {"paths": [str(temp_dir)]}, + } + } + } + }, + ) + + discover = PythonDiscover(settings.for_plugin("docc.python.discover")) + sources: Set[Source] = set(discover.discover(frozenset())) + + builder = PythonBuilder(settings.for_plugin("docc.python.discover")) + documents: Dict[Source, Document] = {} + builder.build(sources, documents) + + document = list(documents.values())[0] + source = list(documents.keys())[0] + index = Index() + + context = Context({Document: document, Source: source, Index: index}) + + PythonTransform( + settings.for_plugin("docc.python.transform") + ).transform(context) + + DocstringTransform( + settings.for_plugin("docc.mistletoe.transform") + ).transform(context) + + ReferenceTransform( + settings.for_plugin("docc.mistletoe.reference") + ).transform(context) + + IndexTransform(settings.for_plugin("docc.references.index")).transform( + context + ) + + # Verify the Reference was found and Index has the definition + class ReferenceChecker(Visitor): + found_reference = False + + def enter(self, node: Node) -> Visit: + if isinstance(node, Reference): + self.found_reference = True + assert node.identifier == "module.other" + return Visit.TraverseChildren + + def exit(self, node: Node) -> None: + pass + + checker = ReferenceChecker() + document.root.visit(checker) + assert ( + checker.found_reference + ), "Reference to module.other should be found" + + # Verify function was indexed + locations = list(index.lookup("references.other")) + assert ( + len(locations) == 1 + ), "Function 'other' should be indexed exactly once" + + +class TestEdgeCases: + def test_empty_python_file(self, temp_dir: Path) -> None: + (temp_dir / "empty.py").write_text("") + + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.python.discover": {"paths": [str(temp_dir)]}, + } + } + } + }, + ) + + discover = PythonDiscover(settings.for_plugin("docc.python.discover")) + sources: Set[Source] = set(discover.discover(frozenset())) + + builder = PythonBuilder(settings.for_plugin("docc.python.discover")) + documents: Dict[Source, Document] = {} + builder.build(sources, documents) + + assert len(documents) == 1 + # Verify the document was created and can be traversed + document = list(documents.values())[0] + source = list(documents.keys())[0] + assert ( + len(list(document.root.children)) >= 0 + ), "Root should be traversable" + + # Transform the empty file to get the output node types + index = Index() + context = Context({Document: document, Source: source, Index: index}) + transform = PythonTransform( + settings.for_plugin("docc.python.transform") + ) + transform.transform(context) + + # After transform, should have a Module node + collector = NodeCollector() + document.root.visit(collector) + modules = collector.get(nodes.Module) + assert ( + len(modules) == 1 + ), "Empty file should produce exactly one Module" + + module = modules[0] + assert isinstance(module, nodes.Module) + + # Empty module should have no class/function/attribute members + assert ( + len(collector.get(nodes.Class)) == 0 + ), "Empty module should have no classes" + assert ( + len(collector.get(nodes.Function)) == 0 + ), "Empty module should have no functions" + assert ( + len(collector.get(nodes.Attribute)) == 0 + ), "Empty module should have no attributes" + assert ( + len(collector.get(nodes.Docstring)) == 0 + ), "Empty module should have no docstrings" + + def test_python_with_syntax_error_handled(self, temp_dir: Path) -> None: + (temp_dir / "syntax_error.py").write_text("def broken(\n") + + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.python.discover": {"paths": [str(temp_dir)]}, + } + } + } + }, + ) + + discover = PythonDiscover(settings.for_plugin("docc.python.discover")) + sources: Set[Source] = set(discover.discover(frozenset())) + + builder = PythonBuilder(settings.for_plugin("docc.python.discover")) + documents: Dict[Source, Document] = {} + + with pytest.raises(libcst.ParserSyntaxError): + builder.build(sources, documents) + + def test_nested_classes(self, temp_dir: Path) -> None: + py_content = '''"""Module.""" + +class Outer: + """Outer class.""" + + class Inner: + """Inner class.""" + + def method(self) -> None: + """Inner method.""" + pass +''' + (temp_dir / "nested.py").write_text(py_content) + + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.python.discover": {"paths": [str(temp_dir)]}, + } + } + } + }, + ) + + discover = PythonDiscover(settings.for_plugin("docc.python.discover")) + sources: Set[Source] = set(discover.discover(frozenset())) + + builder = PythonBuilder(settings.for_plugin("docc.python.discover")) + documents: Dict[Source, Document] = {} + builder.build(sources, documents) + + assert len(documents) == 1 + + # Verify nested class structure was captured + document = list(documents.values())[0] + import libcst + + from docc.plugins.python.cst import CstNode + + class NestedClassChecker(Visitor): + found_outer = False + found_inner = False + found_method = False + + def enter(self, node: Node) -> Visit: + if isinstance(node, CstNode): + cst = node.cst_node + if isinstance(cst, libcst.ClassDef): + if cst.name.value == "Outer": + self.found_outer = True + elif cst.name.value == "Inner": + self.found_inner = True + elif isinstance(cst, libcst.FunctionDef): + if cst.name.value == "method": + self.found_method = True + return Visit.TraverseChildren + + def exit(self, node: Node) -> None: + pass + + checker = NestedClassChecker() + document.root.visit(checker) + + assert checker.found_outer, "Outer class should be found" + assert checker.found_inner, "Inner class should be found" + assert checker.found_method, "Inner method should be found" + + +class TestPythonTransformContract: + """ + Behavioral-level tests for the Python source -> document tree pipeline. + + These tests assert on the OUTPUT contract (document tree structure using + node types from docc.plugins.python.nodes), NOT on CST internals. They + should survive a CST -> AST migration unchanged. + """ + + def test_module_with_class_and_function(self, temp_dir: Path) -> None: + """Verify structural output for a module with class and function.""" + py_content = '''"""Module docstring.""" + +class MyClass: + """A test class.""" + + def method(self, x: int) -> str: + """A method.""" + return str(x) + +def standalone_func(arg: str) -> None: + """Standalone function.""" + pass +''' + document, source, context = _run_pipeline(temp_dir, py_content) + + collector = NodeCollector() + document.root.visit(collector) + + # The document tree root should contain a Definition wrapping a Module + definitions = collector.get(Definition) + module_definitions = [ + d + for d in definitions + if isinstance(d, Definition) and isinstance(d.child, nodes.Module) + ] + assert ( + len(module_definitions) >= 1 + ), "Should have a Definition wrapping a nodes.Module" + + # There should be exactly one Module + modules = collector.get(nodes.Module) + assert len(modules) == 1, f"Expected 1 Module, got {len(modules)}" + module = modules[0] + assert isinstance(module, nodes.Module) + + # The Module should have the module docstring + docstrings = collector.get(nodes.Docstring) + docstring_texts = [ + d.text for d in docstrings if isinstance(d, nodes.Docstring) + ] + assert any( + "Module docstring" in t for t in docstring_texts + ), f"Expected module docstring, got: {docstring_texts}" + + # The Module members should include a Definition wrapping a Class + # named "MyClass" + classes = collector.get(nodes.Class) + assert len(classes) >= 1, "Should have at least one Class node" + class_names = [ + c.name.name + for c in classes + if isinstance(c, nodes.Class) and isinstance(c.name, nodes.Name) + ] + assert ( + "MyClass" in class_names + ), f"Expected class 'MyClass', got: {class_names}" + + # MyClass should be wrapped in a Definition + class_definitions = [ + d + for d in definitions + if isinstance(d, Definition) and isinstance(d.child, nodes.Class) + ] + assert ( + len(class_definitions) >= 1 + ), "Class should be wrapped in a Definition" + + # The Class should have members including a Function named "method" + functions = collector.get(nodes.Function) + func_names = [ + f.name.name + for f in functions + if isinstance(f, nodes.Function) and isinstance(f.name, nodes.Name) + ] + assert ( + "method" in func_names + ), f"Expected function 'method', got: {func_names}" + assert ( + "standalone_func" in func_names + ), f"Expected function 'standalone_func', got: {func_names}" + + # The method Function should be wrapped in a Definition + func_definitions = [ + d + for d in definitions + if isinstance(d, Definition) + and isinstance(d.child, nodes.Function) + ] + assert ( + len(func_definitions) >= 1 + ), "Functions should be wrapped in Definitions" + + # Verify docstrings exist for class and functions + assert any( + "A test class" in t for t in docstring_texts + ), f"Expected class docstring, got: {docstring_texts}" + assert any( + "A method" in t for t in docstring_texts + ), f"Expected method docstring, got: {docstring_texts}" + assert any( + "Standalone function" in t for t in docstring_texts + ), f"Expected standalone_func docstring, got: {docstring_texts}" + + def test_class_with_attributes(self, temp_dir: Path) -> None: + """Verify that annotated class attributes produce Attribute nodes.""" + py_content = '''"""Module.""" + +class Config: + """Configuration class.""" + + timeout: int + """Timeout in seconds.""" + + name: str +''' + document, source, context = _run_pipeline( + temp_dir, py_content, filename="config.py" + ) + + collector = NodeCollector() + document.root.visit(collector) + + # Class "Config" should exist + classes = collector.get(nodes.Class) + config_classes = [ + c + for c in classes + if isinstance(c, nodes.Class) + and isinstance(c.name, nodes.Name) + and c.name.name == "Config" + ] + assert ( + len(config_classes) == 1 + ), "Should have exactly one class named 'Config'" + config_class = config_classes[0] + assert isinstance(config_class, nodes.Class) + + # Config should have members + assert isinstance(config_class.members, ListNode) + member_list = list(config_class.members.children) + assert len(member_list) >= 1, "Config should have members" + + # At least one member should be (or wrap) a nodes.Attribute + attributes = collector.get(nodes.Attribute) + assert len(attributes) >= 1, "Should have at least one Attribute node" + + # The attribute with docstring ("timeout") should have a Docstring + attr_with_docstring = None + for attr in attributes: + assert isinstance(attr, nodes.Attribute) + if isinstance(attr.docstring, nodes.Docstring): + attr_with_docstring = attr + break + + assert ( + attr_with_docstring is not None + ), "At least one attribute should have a Docstring child" + assert isinstance(attr_with_docstring.docstring, nodes.Docstring) + assert "Timeout in seconds" in attr_with_docstring.docstring.text, ( + f"Expected 'Timeout in seconds' in docstring, " + f"got: {attr_with_docstring.docstring.text}" + ) + + def test_determinism(self, temp_dir: Path) -> None: + """Run the same source twice to verify identical output.""" + py_content = '''"""Module docstring.""" + +class MyClass: + """A test class.""" + + x: int + """An attribute.""" + + def method(self, x: int) -> str: + """A method.""" + return str(x) + +def standalone_func(arg: str) -> None: + """Standalone function.""" + pass +''' + # First run + with tempfile.TemporaryDirectory() as td1: + doc1, _, _ = _run_pipeline(Path(td1), py_content) + collector1 = NodeCollector() + doc1.root.visit(collector1) + + # Second run (fresh temp dir, fresh pipeline state) + with tempfile.TemporaryDirectory() as td2: + doc2, _, _ = _run_pipeline(Path(td2), py_content) + collector2 = NodeCollector() + doc2.root.visit(collector2) + + # Both runs should produce the same sequence of node types + types1 = [type(n).__name__ for n in collector1.all_nodes] + types2 = [type(n).__name__ for n in collector2.all_nodes] + assert types1 == types2, ( + f"Tree structure differs between runs:\n" + f" Run 1: {types1}\n" + f" Run 2: {types2}" + ) + + # Both runs should produce the same node names in the same order + def extract_names(collector: NodeCollector) -> List[str]: + result = [] + for node in collector.all_nodes: + if isinstance(node, nodes.Name): + result.append(node.name) + return result + + names1 = extract_names(collector1) + names2 = extract_names(collector2) + assert names1 == names2, ( + f"Name nodes differ between runs:\n" + f" Run 1: {names1}\n" + f" Run 2: {names2}" + ) + + # Both runs should produce the same docstrings in the same order + docstrings1 = [ + d.text + for d in collector1.get(nodes.Docstring) + if isinstance(d, nodes.Docstring) + ] + docstrings2 = [ + d.text + for d in collector2.get(nodes.Docstring) + if isinstance(d, nodes.Docstring) + ] + assert docstrings1 == docstrings2, ( + f"Docstrings differ between runs:\n" + f" Run 1: {docstrings1}\n" + f" Run 2: {docstrings2}" + ) diff --git a/tests/test_listing.py b/tests/test_listing.py new file mode 100644 index 0000000..1aefd22 --- /dev/null +++ b/tests/test_listing.py @@ -0,0 +1,342 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import tempfile +from pathlib import Path, PurePath +from typing import Dict, Iterator, List, Optional, Set + +import pytest + +from docc.context import Context +from docc.document import BlankNode, Document +from docc.plugins.html import HTMLTag +from docc.plugins.listing import ( + Listable, + ListingBuilder, + ListingDiscover, + ListingNode, + ListingSource, + render_html, +) +from docc.settings import PluginSettings, Settings +from docc.source import Source + + +@pytest.fixture +def temp_dir() -> Iterator[Path]: + with tempfile.TemporaryDirectory() as td: + yield Path(td) + + +@pytest.fixture +def plugin_settings(temp_dir: Path) -> PluginSettings: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + return settings.for_plugin("docc.listing.discover") + + +class MockSource(Source): + _output_path: PurePath + + def __init__( + self, + relative_path: Optional[PurePath] = None, + output_path: Optional[PurePath] = None, + ) -> None: + self._relative_path = relative_path + self._output_path = output_path or relative_path or PurePath("output") + + @property + def relative_path(self) -> Optional[PurePath]: + return self._relative_path + + @property + def output_path(self) -> PurePath: + return self._output_path + + +class ListableSource(Source, Listable): + def __init__( + self, + relative_path: Optional[PurePath] = None, + show: bool = True, + ) -> None: + self._relative_path = relative_path + self._show = show + + @property + def relative_path(self) -> Optional[PurePath]: + return self._relative_path + + @property + def output_path(self) -> PurePath: + return self._relative_path or PurePath("output") + + @property + def show_in_listing(self) -> bool: + return self._show + + +class TestListingSource: + def test_init(self) -> None: + relative = PurePath("src") + output = PurePath("src/index") + sources: Set[Source] = set() + + source = ListingSource(relative, output, sources) + + assert source.relative_path == relative + assert source.output_path == output + assert source.sources is sources + + def test_output_path_property(self) -> None: + source = ListingSource( + PurePath("docs"), + PurePath("docs/index"), + set(), + ) + assert source.output_path == PurePath("docs/index") + + def test_relative_path_property(self) -> None: + source = ListingSource( + PurePath("api"), + PurePath("api/index"), + set(), + ) + assert source.relative_path == PurePath("api") + + +class TestListingNode: + def test_init(self) -> None: + sources: Set[Source] = {MockSource(PurePath("test.py"))} + node = ListingNode(sources) + assert node.sources is sources + + def test_children_empty(self) -> None: + node = ListingNode(set()) + assert node.children == () + + def test_replace_child_raises(self) -> None: + node = ListingNode(set()) + with pytest.raises(TypeError): + node.replace_child(BlankNode(), BlankNode()) + + +class TestListingBuilder: + def test_build_processes_listing_sources( + self, plugin_settings: PluginSettings + ) -> None: + source = ListingSource(PurePath("docs"), PurePath("docs/index"), set()) + unprocessed: Set[Source] = {source} + processed: Dict[Source, Document] = {} + + builder = ListingBuilder(plugin_settings) + builder.build(unprocessed, processed) + + assert len(unprocessed) == 0 + assert source in processed + assert isinstance(processed[source].root, ListingNode) + + def test_build_ignores_non_listing_sources( + self, plugin_settings: PluginSettings + ) -> None: + source = MockSource(PurePath("test.py")) + unprocessed: Set[Source] = {source} + processed: Dict[Source, Document] = {} + + builder = ListingBuilder(plugin_settings) + builder.build(unprocessed, processed) + + assert source in unprocessed + assert len(processed) == 0 + + +class TestListingDiscover: + def test_discover_empty_known( + self, plugin_settings: PluginSettings + ) -> None: + discover = ListingDiscover(plugin_settings) + sources = list(discover.discover(frozenset())) + assert sources == [] + + def test_discover_creates_listing_for_directory( + self, plugin_settings: PluginSettings + ) -> None: + known_source = MockSource(PurePath("src/module.py")) + known: frozenset[Source] = frozenset([known_source]) + + discover = ListingDiscover(plugin_settings) + sources = list(discover.discover(known)) + + # "src/module.py" has parents "src" and ".", so 2 listings are created + assert len(sources) == 2, "Should create listings for 'src' and root" + assert all(isinstance(s, ListingSource) for s in sources) + # Verify a listing was created for the 'src' directory + src_listing = next( + (s for s in sources if s.relative_path == PurePath("src")), None + ) + assert ( + src_listing is not None + ), "Should create listing for 'src' directory" + + def test_discover_creates_nested_listings( + self, plugin_settings: PluginSettings + ) -> None: + known_source = MockSource(PurePath("a/b/c/module.py")) + known: frozenset[Source] = frozenset([known_source]) + + discover = ListingDiscover(plugin_settings) + sources = list(discover.discover(known)) + + # Should create listings for a, a/b, a/b/c (at least 3 levels) + assert ( + len(sources) >= 3 + ), "Should create listings for each directory level" + # Verify specific directory listings were created + paths = {s.relative_path for s in sources} + assert PurePath("a") in paths, "Should create listing for 'a'" + assert PurePath("a/b") in paths, "Should create listing for 'a/b'" + assert PurePath("a/b/c") in paths, "Should create listing for 'a/b/c'" + + def test_discover_skips_listable_hidden( + self, plugin_settings: PluginSettings + ) -> None: + hidden_source = ListableSource(PurePath("hidden/file.py"), show=False) + visible_source = MockSource(PurePath("visible/file.py")) + known: frozenset[Source] = frozenset([hidden_source, visible_source]) + + discover = ListingDiscover(plugin_settings) + sources = list(discover.discover(known)) + + listing_paths = [str(s.relative_path) for s in sources] + assert "hidden" not in listing_paths + assert any("visible" in p for p in listing_paths) + + def test_discover_includes_listable_shown( + self, plugin_settings: PluginSettings + ) -> None: + shown_source = ListableSource(PurePath("shown/file.py"), show=True) + known: frozenset[Source] = frozenset([shown_source]) + + discover = ListingDiscover(plugin_settings) + sources = list(discover.discover(known)) + + # "shown/file.py" has parents "shown" and ".", so 2 listings + assert len(sources) == 2, "Should create listings for 'shown' and root" + # Verify a listing was created for the 'shown' directory + shown_listing = next( + (s for s in sources if s.relative_path == PurePath("shown")), None + ) + assert ( + shown_listing is not None + ), "Should create listing for 'shown' directory" + # Verify the shown source is included in the listing + assert ( + shown_source in shown_listing.sources + ), "Shown source should be in listing" + + def test_discover_skips_source_without_path( + self, plugin_settings: PluginSettings + ) -> None: + no_path_source = MockSource( + relative_path=None, output_path=PurePath("out") + ) + known: frozenset[Source] = frozenset([no_path_source]) + + discover = ListingDiscover(plugin_settings) + sources = list(discover.discover(known)) + + assert sources == [] + + def test_discover_adds_sources_to_listing( + self, plugin_settings: PluginSettings + ) -> None: + first_source = MockSource(PurePath("dir/file1.py")) + second_source = MockSource(PurePath("dir/file2.py")) + known: frozenset[Source] = frozenset([first_source, second_source]) + + discover = ListingDiscover(plugin_settings) + sources = list(discover.discover(known)) + + dir_listing = next( + (s for s in sources if s.relative_path == PurePath("dir")), None + ) + assert dir_listing is not None + assert first_source in dir_listing.sources + assert second_source in dir_listing.sources + + def test_discover_listable_with_no_relative_path_falls_back_to_output_path( + self, plugin_settings: PluginSettings + ) -> None: + """ + Test that Listable sources with relative_path=None fall + back to output_path for directory listing discovery. + """ + source = ListableSource(relative_path=None, show=True) + known: frozenset[Source] = frozenset([source]) + + discover = ListingDiscover(plugin_settings) + sources = list(discover.discover(known)) + + # The source has output_path="output", so a listing + # should be created for its parent directory (".") + # output_path="output" has one parent ".", so 1 listing is created + assert ( + len(sources) == 1 + ), "Should create listing when Listable falls back to output_path" + + +class TestRenderHtml: + def test_render_html_produces_links(self) -> None: + """Test that render_html produces correct HTML with relative links.""" + entry_source = MockSource( + relative_path=PurePath("docs/api/module.py"), + output_path=PurePath("docs/api/module"), + ) + listing_sources: Set[Source] = {entry_source} + listing_source = ListingSource( + PurePath("docs"), + PurePath("docs/index"), + listing_sources, + ) + node = ListingNode(listing_sources) + + context = Context({Source: listing_source}) + parent = HTMLTag("div") + + render_html(context, parent, node) + + # Parent should now contain children from the template + children = list(parent.children) + assert ( + len(children) > 0 + ), "render_html should append children to parent" + + # Walk the HTML tree to find tags with href attributes + def find_tags(node: object, tag_name: str) -> List[HTMLTag]: + results: List[HTMLTag] = [] + if isinstance(node, HTMLTag): + if node.tag_name == tag_name: + results.append(node) + for child in node.children: + results.extend(find_tags(child, tag_name)) + return results + + links = find_tags(parent, "a") + assert len(links) >= 1, "Should produce at least one link" + # The link should have an href attribute ending with .html + href = links[0].attributes.get("href") or "" + assert href.endswith( + ".html" + ), f"Link href should end with .html, got: {href}" diff --git a/tests/test_loader.py b/tests/test_loader.py new file mode 100644 index 0000000..18625a2 --- /dev/null +++ b/tests/test_loader.py @@ -0,0 +1,123 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from unittest.mock import MagicMock + +import pytest + +from docc.build import Builder +from docc.discover import Discover +from docc.plugins.loader import Loader, PluginError +from docc.transform import Transform + + +class TestLoader: + def test_init(self) -> None: + loader = Loader() + assert isinstance(loader.entry_points, dict) + assert len(loader.entry_points) > 0, "Entry points should be populated" + assert "docc.python.discover" in loader.entry_points + + def test_entry_points_populated(self) -> None: + loader = Loader() + assert len(loader.entry_points) > 0 + assert "docc.python.discover" in loader.entry_points + + def test_load_discover_plugin(self) -> None: + loader = Loader() + cls = loader.load(Discover, "docc.python.discover") + assert isinstance(cls, type) + assert issubclass(cls, Discover) + + def test_load_builder_plugin(self) -> None: + loader = Loader() + cls = loader.load(Builder, "docc.python.build") + assert isinstance(cls, type) + assert issubclass(cls, Builder) + + def test_load_transform_plugin(self) -> None: + loader = Loader() + cls = loader.load(Transform, "docc.python.transform") + assert isinstance(cls, type) + assert issubclass(cls, Transform) + + def test_load_nonexistent_plugin_raises(self) -> None: + loader = Loader() + with pytest.raises(KeyError): + loader.load(Discover, "nonexistent.plugin") + + def test_load_abstract_class_raises_plugin_error(self) -> None: + loader = Loader() + # Inject a fake entry point that loads an abstract class + mock_ep = MagicMock() + mock_ep.load.return_value = Discover + loader.entry_points["fake.abstract"] = mock_ep + + with pytest.raises(PluginError, match="is abstract"): + loader.load(Discover, "fake.abstract") + + def test_load_wrong_subclass_raises_plugin_error(self) -> None: + loader = Loader() + # Inject a fake entry point that loads a class not subclassing the base + + class NotADiscover: + pass + + mock_ep = MagicMock() + mock_ep.load.return_value = NotADiscover + loader.entry_points["fake.wrong_type"] = mock_ep + + with pytest.raises(PluginError, match="is not a subclass of"): + loader.load(Discover, "fake.wrong_type") + + +class TestPluginError: + def test_create_error(self) -> None: + error = PluginError("test error message") + assert "test error message" in str(error) + + def test_error_inheritance(self) -> None: + error = PluginError("test") + assert isinstance(error, Exception) + + +class TestLoaderMultiplePlugins: + def test_load_multiple_transforms(self) -> None: + loader = Loader() + + transforms = [ + "docc.python.transform", + "docc.mistletoe.transform", + "docc.mistletoe.reference", + "docc.html.transform", + ] + + for name in transforms: + cls = loader.load(Transform, name) + assert isinstance(cls, type) + assert issubclass(cls, Transform) + + def test_load_multiple_discovers(self) -> None: + loader = Loader() + + discovers = [ + "docc.python.discover", + "docc.html.discover", + ] + + for name in discovers: + cls = loader.load(Discover, name) + assert isinstance(cls, type) + assert issubclass(cls, Discover) diff --git a/tests/test_mistletoe_comprehensive.py b/tests/test_mistletoe_comprehensive.py new file mode 100644 index 0000000..c6ca1c2 --- /dev/null +++ b/tests/test_mistletoe_comprehensive.py @@ -0,0 +1,831 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from unittest.mock import MagicMock + +import mistletoe as md +import pytest +from conftest import ReferenceChecker +from mistletoe import block_token as blocks +from mistletoe import span_token as spans + +from docc.context import Context +from docc.document import BlankNode, ListNode, Visit +from docc.plugins.html import HTMLRoot, HTMLTag, TextNode +from docc.plugins.mistletoe import ( + MarkdownNode, + _DocstringVisitor, + _ReferenceVisitor, + _render_auto_link, + _render_block_code, + _render_document, + _render_emphasis, + _render_escape_sequence, + _render_heading, + _render_html_block, + _render_html_span, + _render_image, + _render_inline_code, + _render_line_break, + _render_link, + _render_list, + _render_list_item, + _render_paragraph, + _render_quote, + _render_raw_text, + _render_strikethrough, + _render_strong, + _render_table, + _render_table_cell, + _render_table_row, + _render_thematic_break, + _SearchVisitor, + render_html, +) +from docc.plugins.python import nodes + + +@pytest.fixture +def context() -> Context: + return Context({}) + + +@pytest.fixture +def html_root(context: Context) -> HTMLRoot: + return HTMLRoot(context) + + +class TestMarkdownNodeComprehensive: + def test_repr(self) -> None: + markdown = "test" + node = MarkdownNode(md.Document(markdown)) + result = repr(node) + assert "MarkdownNode" in result + assert "Document" in result + + def test_replace_child(self) -> None: + markdown = "**bold**" + node = MarkdownNode(md.Document(markdown)) + + children = list(node.children) + # "**bold**" produces a Document with one Paragraph child + assert len(children) == 1 + old = children[0] + new = BlankNode() + node.replace_child(old, new) + + new_children = list(node.children) + assert new in new_children + + def test_search_children_returns_false(self) -> None: + node = MarkdownNode(md.Document("test")) + assert node.search_children() is False + + def test_to_search_returns_text(self) -> None: + node = MarkdownNode(md.Document("hello world")) + result = node.to_search() + assert "hello world" in result + + +class TestRenderStrong: + def test_creates_strong_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "**bold**" + doc = md.Document(markdown) + para = doc.children[0] + strong_token = para.children[0] + node = MarkdownNode(strong_token) + + result = _render_strong(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "strong" + children = list(html_root.children) + assert result in children + + +class TestRenderEmphasis: + def test_creates_em_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "*italic*" + doc = md.Document(markdown) + para = doc.children[0] + em_token = para.children[0] + node = MarkdownNode(em_token) + + result = _render_emphasis(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "em" + + +class TestRenderInlineCode: + def test_creates_code_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "`code`" + doc = md.Document(markdown) + para = doc.children[0] + code_token = para.children[0] + node = MarkdownNode(code_token) + + result = _render_inline_code(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "code" + + +class TestRenderRawText: + def test_creates_text_node( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "plain text" + doc = md.Document(markdown) + para = doc.children[0] + text_token = para.children[0] + node = MarkdownNode(text_token) + + result = _render_raw_text(context, html_root, node) + + assert result is None + children = list(html_root.children) + text_nodes = [c for c in children if isinstance(c, TextNode)] + assert len(text_nodes) == 1 + assert text_nodes[0]._value == "plain text" + + +class TestRenderStrikethrough: + def test_creates_del_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "~~strikethrough~~" + doc = md.Document(markdown) + para = doc.children[0] + strike_token = para.children[0] + node = MarkdownNode(strike_token) + + result = _render_strikethrough(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "del" + + +class TestRenderImage: + def test_creates_img_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + mock_token = MagicMock(spec=spans.Image) + mock_token.src = "image.png" + mock_token.content = "alt text" + mock_token.title = "" + node = MarkdownNode(mock_token) + + result = _render_image(context, html_root, node) + + assert result is None + children = list(html_root.children) + img = next( + c + for c in children + if isinstance(c, HTMLTag) and c.tag_name == "img" + ) + assert img.attributes["src"] == "image.png" + assert img.attributes["alt"] == "alt text" + + def test_img_with_title( + self, context: Context, html_root: HTMLRoot + ) -> None: + mock_token = MagicMock(spec=spans.Image) + mock_token.src = "image.png" + mock_token.content = "alt" + mock_token.title = "title" + node = MarkdownNode(mock_token) + + _render_image(context, html_root, node) + + children = list(html_root.children) + img = next( + c + for c in children + if isinstance(c, HTMLTag) and c.tag_name == "img" + ) + assert img.attributes.get("title") == "title" + + +class TestRenderLink: + def test_creates_anchor_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "[link](http://example.com)" + doc = md.Document(markdown) + para = doc.children[0] + link_token = para.children[0] + node = MarkdownNode(link_token) + + result = _render_link(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "a" + assert result.attributes.get("href") == "http://example.com" + + def test_link_with_title( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = '[link](http://example.com "title")' + doc = md.Document(markdown) + para = doc.children[0] + link_token = para.children[0] + node = MarkdownNode(link_token) + + result = _render_link(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.attributes.get("title") == "title" + + +class TestRenderAutoLink: + def test_creates_anchor_for_url( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "" + doc = md.Document(markdown) + para = doc.children[0] + auto_token = para.children[0] + node = MarkdownNode(auto_token) + + result = _render_auto_link(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "a" + + def test_mailto_prefix_for_email( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "" + doc = md.Document(markdown) + para = doc.children[0] + auto_token = para.children[0] + node = MarkdownNode(auto_token) + + result = _render_auto_link(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert "mailto:" in (result.attributes.get("href") or "") + + +class TestRenderEscapeSequence: + def test_raises_not_implemented( + self, context: Context, html_root: HTMLRoot + ) -> None: + node = MarkdownNode(md.Document("test")) + + with pytest.raises(NotImplementedError): + _render_escape_sequence(context, html_root, node) + + +class TestRenderHeading: + def test_creates_h1_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "# Heading 1" + doc = md.Document(markdown) + heading_token = doc.children[0] + node = MarkdownNode(heading_token) + + result = _render_heading(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "h1" + + def test_creates_h2_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "## Heading 2" + doc = md.Document(markdown) + heading_token = doc.children[0] + node = MarkdownNode(heading_token) + + result = _render_heading(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "h2" + + def test_creates_h3_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "### Heading 3" + doc = md.Document(markdown) + heading_token = doc.children[0] + node = MarkdownNode(heading_token) + + result = _render_heading(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "h3" + + +class TestRenderQuote: + def test_creates_blockquote_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "> quoted text" + doc = md.Document(markdown) + quote_token = doc.children[0] + node = MarkdownNode(quote_token) + + result = _render_quote(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "blockquote" + + +class TestRenderParagraph: + def test_creates_p_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "paragraph text" + doc = md.Document(markdown) + para_token = doc.children[0] + node = MarkdownNode(para_token) + + result = _render_paragraph(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "p" + + +class TestRenderBlockCode: + def test_creates_pre_code_tags( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "```\ncode block\n```" + doc = md.Document(markdown) + code_token = doc.children[0] + node = MarkdownNode(code_token) + + result = _render_block_code(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "code" + children = list(html_root.children) + pre = children[0] + assert isinstance(pre, HTMLTag) + assert pre.tag_name == "pre" + + +class TestRenderList: + def test_creates_ul_for_unordered( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "- item 1\n- item 2" + doc = md.Document(markdown) + list_token = doc.children[0] + node = MarkdownNode(list_token) + + result = _render_list(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "ul" + + def test_creates_ol_for_ordered( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "1. item 1\n2. item 2" + doc = md.Document(markdown) + list_token = doc.children[0] + node = MarkdownNode(list_token) + + result = _render_list(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "ol" + + def test_ol_with_custom_start( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "5. item 1\n6. item 2" + doc = md.Document(markdown) + list_token = doc.children[0] + node = MarkdownNode(list_token) + + result = _render_list(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.attributes.get("start") == 5 + + +class TestRenderListItem: + def test_creates_li_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "- item" + doc = md.Document(markdown) + list_token = doc.children[0] + item_token = list_token.children[0] + node = MarkdownNode(item_token) + + result = _render_list_item(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "li" + + +class TestRenderTable: + def test_creates_table_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "| A | B |\n|---|---|\n| 1 | 2 |" + doc = md.Document(markdown) + table_token = doc.children[0] + node = MarkdownNode(table_token) + + result = _render_table(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "table" + + def test_table_with_header_creates_thead( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "| A | B |\n|---|---|\n| 1 | 2 |" + doc = md.Document(markdown) + table_token = doc.children[0] + node = MarkdownNode(table_token) + + result = _render_table(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "table" + # Table with header row should have a child + thead_children = [ + c + for c in result.children + if isinstance(c, HTMLTag) and c.tag_name == "thead" + ] + assert ( + len(thead_children) == 1 + ), "Table with header row should have a child element" + + +class TestRenderTableRow: + def test_creates_tr_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "| A | B |\n|---|---|\n| 1 | 2 |" + doc = md.Document(markdown) + table_token = doc.children[0] + row_token = table_token.children[0] + node = MarkdownNode(row_token) + + result = _render_table_row(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "tr" + + +class TestRenderTableCell: + def test_creates_td_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "| A | B |\n|---|---|\n| 1 | 2 |" + doc = md.Document(markdown) + table_token = doc.children[0] + row_token = table_token.children[0] + cell_token = row_token.children[0] + node = MarkdownNode(cell_token) + + result = _render_table_cell(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "td" + + def test_default_alignment_is_left( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "| A |\n|---|\n| 1 |" + doc = md.Document(markdown) + table_token = doc.children[0] + row_token = table_token.children[0] + cell_token = row_token.children[0] + node = MarkdownNode(cell_token) + + result = _render_table_cell(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.attributes.get("align") == "left" + + def test_center_alignment( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "| A |\n|:---:|\n| 1 |" + doc = md.Document(markdown) + table_token = doc.children[0] + row_token = table_token.children[0] + cell_token = row_token.children[0] + node = MarkdownNode(cell_token) + + result = _render_table_cell(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.attributes.get("align") == "center" + + def test_right_alignment( + self, context: Context, html_root: HTMLRoot + ) -> None: + mock_token = MagicMock(spec=blocks.TableCell) + mock_token.align = 2 + node = MarkdownNode(mock_token) + + result = _render_table_cell(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.attributes.get("align") == "right" + + def test_unknown_alignment_raises_not_implemented( + self, context: Context, html_root: HTMLRoot + ) -> None: + mock_token = MagicMock(spec=blocks.TableCell) + mock_token.align = 99 + node = MarkdownNode(mock_token) + + with pytest.raises(NotImplementedError, match="table alignment 99"): + _render_table_cell(context, html_root, node) + + +class TestRenderThematicBreak: + def test_creates_hr_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "---" + doc = md.Document(markdown) + hr_token = doc.children[0] + node = MarkdownNode(hr_token) + + result = _render_thematic_break(context, html_root, node) + + assert result is None + children = list(html_root.children) + assert any( + isinstance(c, HTMLTag) and c.tag_name == "hr" for c in children + ) + + +class TestRenderLineBreak: + def test_hard_break_creates_br_tag( + self, context: Context, html_root: HTMLRoot + ) -> None: + # Two trailing spaces before newline creates a hard break + markdown = "line1 \nline2" + doc = md.Document(markdown) + para = doc.children[0] + + break_token = None + for child in para.children: + if isinstance(child, spans.LineBreak): + break_token = child + break + + assert break_token is not None, "Expected a LineBreak token" + assert ( + not break_token.soft + ), "Two trailing spaces should produce a hard break" + + node = MarkdownNode(break_token) + result = _render_line_break(context, html_root, node) + + assert result is None + children = list(html_root.children) + br_tags = [ + c + for c in children + if isinstance(c, HTMLTag) and c.tag_name == "br" + ] + assert len(br_tags) == 1, "Hard break should append an HTMLTag('br')" + + def test_soft_break_creates_text_newline( + self, context: Context, html_root: HTMLRoot + ) -> None: + # No trailing spaces before newline creates a soft break + markdown = "line1\nline2" + doc = md.Document(markdown) + para = doc.children[0] + + break_token = None + for child in para.children: + if isinstance(child, spans.LineBreak): + break_token = child + break + + assert break_token is not None, "Expected a LineBreak token" + assert ( + break_token.soft + ), "No trailing spaces should produce a soft break" + + node = MarkdownNode(break_token) + result = _render_line_break(context, html_root, node) + + assert result is None + children = list(html_root.children) + text_nodes = [c for c in children if isinstance(c, TextNode)] + assert ( + len(text_nodes) == 1 + ), "Soft break should append a TextNode('\\n')" + assert text_nodes[0]._value == "\n" + + +class TestRenderHtmlSpan: + def test_parses_inline_html( + self, context: Context, html_root: HTMLRoot + ) -> None: + mock_token = MagicMock(spec=spans.HTMLSpan) + mock_token.content = "hello" + node = MarkdownNode(mock_token) + + result = _render_html_span(context, html_root, node) + + assert result is None + children = list(html_root.children) + # "hello" produces exactly one tag + assert len(children) == 1 + em_tag = children[0] + assert isinstance(em_tag, HTMLTag) + assert em_tag.tag_name == "em" + + +class TestRenderHtmlBlock: + def test_parses_block_html( + self, context: Context, html_root: HTMLRoot + ) -> None: + mock_token = MagicMock(spec=blocks.HTMLBlock) + mock_token.content = "
block content
" + node = MarkdownNode(mock_token) + + result = _render_html_block(context, html_root, node) + + assert result is None + children = list(html_root.children) + # "
block content
" produces exactly one
tag + assert len(children) == 1 + div_tag = children[0] + assert isinstance(div_tag, HTMLTag) + assert div_tag.tag_name == "div" + + +class TestRenderDocument: + def test_creates_div_with_class( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "document content" + doc = md.Document(markdown) + node = MarkdownNode(doc) + + result = _render_document(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "div" + assert result.attributes.get("class") == "markdown" + + +class TestRenderHtml: + def test_dispatches_to_correct_renderer( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "plain text" + doc = md.Document(markdown) + node = MarkdownNode(doc) + + result = render_html(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "div" + + def test_renders_paragraph( + self, context: Context, html_root: HTMLRoot + ) -> None: + markdown = "paragraph" + doc = md.Document(markdown) + para = doc.children[0] + node = MarkdownNode(para) + + result = render_html(context, html_root, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "p" + + +class TestDocstringVisitorEdgeCases: + def test_docstring_at_root_becomes_markdown(self) -> None: + visitor = _DocstringVisitor() + docstring = nodes.Docstring("**bold** text") + + docstring.visit(visitor) + + assert visitor.root is not None + assert isinstance(visitor.root, MarkdownNode) + + def test_nested_docstrings(self) -> None: + visitor = _DocstringVisitor() + inner_doc = nodes.Docstring("inner") + outer = ListNode([inner_doc]) + + outer.visit(visitor) + + children = list(outer.children) + assert len(children) == 1 + assert isinstance(children[0], MarkdownNode) + + +class TestReferenceVisitorEdgeCases: + def test_autolink_with_ref_becomes_reference(self) -> None: + visitor = _ReferenceVisitor() + markdown = "" + root = MarkdownNode(md.Document(markdown)) + + root.visit(visitor) + + checker = ReferenceChecker() + assert visitor.root is not None + visitor.root.visit(checker) + assert ( + checker.found + ), "Autolink with ref: prefix should become Reference" + + def test_link_with_multiple_children(self) -> None: + visitor = _ReferenceVisitor() + markdown = "[**bold** text](ref:test)" + root = MarkdownNode(md.Document(markdown)) + + root.visit(visitor) + + checker = ReferenceChecker() + assert visitor.root is not None + visitor.root.visit(checker) + assert ( + checker.found + ), "Link with multiple children should become Reference" + + def test_link_without_ref_prefix_unchanged(self) -> None: + visitor = _ReferenceVisitor() + markdown = "[link](https://example.com)" + root = MarkdownNode(md.Document(markdown)) + + root.visit(visitor) + + assert isinstance( + visitor.root, MarkdownNode + ), "Root should remain MarkdownNode" + checker = ReferenceChecker() + assert visitor.root is not None + visitor.root.visit(checker) + assert ( + not checker.found + ), "Regular links should not become Reference nodes" + + +class TestSearchVisitorEdgeCases: + def test_enter_returns_traverse_children_for_non_markdown(self) -> None: + visitor = _SearchVisitor() + blank = BlankNode() + + result = visitor.enter(blank) + + assert result == Visit.TraverseChildren + + def test_exit_does_nothing(self) -> None: + visitor = _SearchVisitor() + blank = BlankNode() + + visitor.exit(blank) + + def test_raw_text_extraction(self) -> None: + visitor = _SearchVisitor() + markdown = "plain text content" + node = MarkdownNode(md.Document(markdown)) + + node.visit(visitor) + + assert "plain text content" in visitor.texts + + def test_to_search_joins_multiple_fragments_with_spaces(self) -> None: + # Two paragraphs produce separate RawText tokens: "bold" and + # "new paragraph". With " ".join() these become + # "bold new paragraph"; with "".join() they would become + # "boldnew paragraph". + node = MarkdownNode(md.Document("**bold**\n\nnew paragraph")) + result = node.to_search() + + assert "bold new" in result diff --git a/tests/test_mistletoe_extended.py b/tests/test_mistletoe_extended.py new file mode 100644 index 0000000..362c9d0 --- /dev/null +++ b/tests/test_mistletoe_extended.py @@ -0,0 +1,269 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import typing +from pathlib import Path + +import mistletoe as md +import pytest +from conftest import ReferenceChecker +from mistletoe.token import Token as MarkdownToken + +from docc.context import Context +from docc.document import BlankNode, Document, ListNode +from docc.plugins.mistletoe import ( + DocstringTransform, + MarkdownNode, + ReferenceTransform, + _DocstringVisitor, + _ReferenceVisitor, + _SearchVisitor, +) +from docc.plugins.python import nodes +from docc.plugins.references import Reference +from docc.settings import PluginSettings, Settings + + +@pytest.fixture +def plugin_settings() -> PluginSettings: + settings = Settings(Path("."), {"tool": {"docc": {}}}) + return settings.for_plugin("docc.mistletoe.transform") + + +class TestDocstringVisitor: + def test_enter_non_docstring_node(self) -> None: + visitor = _DocstringVisitor() + blank = BlankNode() + + visitor.enter(blank) + assert visitor.root is blank + + def test_exit_non_docstring_node(self) -> None: + visitor = _DocstringVisitor() + blank = BlankNode() + + visitor.enter(blank) + visitor.exit(blank) + + assert len(visitor.stack) == 0 + + def test_transforms_docstring_to_markdown(self) -> None: + visitor = _DocstringVisitor() + docstring = nodes.Docstring("Test **bold**") + parent = ListNode([docstring]) + + parent.visit(visitor) + + children = list(parent.children) + assert len(children) == 1 + assert not isinstance(children[0], nodes.Docstring) + + +class TestReferenceVisitor: + def test_enter_non_markdown_node(self) -> None: + visitor = _ReferenceVisitor() + blank = BlankNode() + + visitor.enter(blank) + assert visitor.root is blank + + def test_exit_non_markdown_node(self) -> None: + visitor = _ReferenceVisitor() + blank = BlankNode() + + visitor.enter(blank) + visitor.exit(blank) + + assert len(visitor.stack) == 0 + + def test_transforms_ref_link_to_reference(self) -> None: + visitor = _ReferenceVisitor() + markdown = "[text](ref:identifier)" + root = MarkdownNode(md.Document(markdown)) + + root.visit(visitor) + + checker = ReferenceChecker() + assert visitor.root is not None + visitor.root.visit(checker) + assert ( + checker.found + ), "ref: link should be transformed to Reference node" + + def test_ignores_non_ref_links(self) -> None: + visitor = _ReferenceVisitor() + markdown = "[text](https://example.com)" + root = MarkdownNode(md.Document(markdown)) + + root.visit(visitor) + + checker = ReferenceChecker() + assert visitor.root is not None + visitor.root.visit(checker) + assert ( + not checker.found + ), "Regular links should not become Reference nodes" + + def test_ref_link_becomes_root_when_only_element(self) -> None: + visitor = _ReferenceVisitor() + markdown = "[ref](ref:test)" + token = md.Document(markdown) + paragraph = token.children[0] + link_token = paragraph.children[0] + link_node = MarkdownNode(link_token) + link_node.visit(visitor) + + assert isinstance( + visitor.root, Reference + ), "Single ref link should become Reference root" + + +class TestSearchVisitor: + def test_collect_empty(self) -> None: + result = _SearchVisitor.collect([]) + assert result == [] + + def test_collect_blank_node(self) -> None: + blank = BlankNode() + result = _SearchVisitor.collect(blank) + assert result == [] + + def test_collect_single_node(self) -> None: + markdown = "Hello world" + node = MarkdownNode(md.Document(markdown)) + result = _SearchVisitor.collect(node) + assert "Hello world" in " ".join(result) + + def test_collect_multiple_nodes(self) -> None: + nodes_list = [ + MarkdownNode(md.Document("First")), + MarkdownNode(md.Document("Second")), + ] + result = _SearchVisitor.collect(nodes_list) + combined = " ".join(result) + assert "First" in combined + assert "Second" in combined + + +class TestDocstringTransform: + def test_transform_simple_docstring( + self, plugin_settings: PluginSettings + ) -> None: + docstring = nodes.Docstring("A simple docstring") + root = ListNode([docstring]) + document = Document(root) + context = Context({Document: document}) + + transform = DocstringTransform(plugin_settings) + transform.transform(context) + + children = list(context[Document].root.children) + assert not isinstance(children[0], nodes.Docstring) + + +class TestReferenceTransform: + def test_transform_creates_references( + self, plugin_settings: PluginSettings + ) -> None: + markdown = "[link](ref:test.module)" + root = MarkdownNode(md.Document(markdown)) + document = Document(root) + context = Context({Document: document}) + + transform = ReferenceTransform(plugin_settings) + transform.transform(context) + + checker = ReferenceChecker() + context[Document].root.visit(checker) + assert ( + checker.found + ), "Transform should create Reference nodes from ref: links" + + +class TestMarkdownNodeChildren: + def test_children_with_token_children_none(self) -> None: + class MockToken: + children = None + + node = MarkdownNode(typing.cast(MarkdownToken, MockToken())) + children = list(node.children) + assert children == [] + + def test_children_lazy_evaluation(self) -> None: + markdown = "Test **bold**" + node = MarkdownNode(md.Document(markdown)) + + assert node._children is None + + children = list(node.children) + + assert node._children is not None + assert len(children) > 0 + + +class TestMarkdownFormats: + def test_strong_text(self) -> None: + markdown = "**strong**" + node = MarkdownNode(md.Document(markdown)) + result = node.to_search() + assert "strong" in result + + def test_emphasis_text(self) -> None: + markdown = "*emphasis*" + node = MarkdownNode(md.Document(markdown)) + result = node.to_search() + assert "emphasis" in result + + def test_code_block(self) -> None: + markdown = "```python\ncode\n```" + node = MarkdownNode(md.Document(markdown)) + result = node.to_search() + assert "code" in result, "Code block content should be searchable" + + def test_list_items(self) -> None: + markdown = "- item 1\n- item 2" + node = MarkdownNode(md.Document(markdown)) + result = node.to_search() + assert "item 1" in result + assert "item 2" in result + + def test_heading(self) -> None: + markdown = "# Heading" + node = MarkdownNode(md.Document(markdown)) + result = node.to_search() + assert "Heading" in result + + def test_link_with_text(self) -> None: + markdown = "[link text](http://example.com)" + node = MarkdownNode(md.Document(markdown)) + result = node.to_search() + assert "link text" in result + + def test_mixed_content(self) -> None: + markdown = """ +# Title + +Paragraph with **bold** and *italic*. + +- List item +- Another item + +[Link](http://example.com) +""" + node = MarkdownNode(md.Document(markdown)) + result = node.to_search() + assert "Title" in result + assert "bold" in result + assert "List item" in result diff --git a/tests/test_python_cst.py b/tests/test_python_cst.py new file mode 100644 index 0000000..91a060a --- /dev/null +++ b/tests/test_python_cst.py @@ -0,0 +1,505 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import tempfile +from collections.abc import Iterator +from pathlib import Path, PurePath +from typing import Dict, Mapping, Set + +import pytest + +from docc.document import BlankNode, Document, ListNode +from docc.plugins.python import nodes +from docc.plugins.python.cst import ( + PythonBuilder, + PythonDiscover, + PythonSource, +) +from docc.settings import Settings +from docc.source import Source + + +@pytest.fixture +def temp_dir() -> Iterator[Path]: + with tempfile.TemporaryDirectory() as td: + yield Path(td) + + +@pytest.fixture +def settings_with_paths(temp_dir: Path) -> Settings: + settings_dict: Dict[str, object] = { + "tool": { + "docc": { + "plugins": { + "docc.python.discover": {"paths": [str(temp_dir)]}, + } + } + } + } + return Settings(temp_dir, settings_dict) + + +class TestPythonDiscover: + def test_init_raises_on_non_sequence_paths(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": {"docc.python.discover": {"paths": 123}} + } + } + }, + ) + plugin_settings = settings.for_plugin("docc.python.discover") + + with pytest.raises(TypeError, match="paths must be a list"): + PythonDiscover(plugin_settings) + + def test_init_raises_on_non_string_path(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": {"docc.python.discover": {"paths": [123]}} + } + } + }, + ) + plugin_settings = settings.for_plugin("docc.python.discover") + + with pytest.raises( + TypeError, match="every python path must be a string" + ): + PythonDiscover(plugin_settings) + + def test_init_raises_on_empty_paths(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": {"docc.python.discover": {"paths": []}} + } + } + }, + ) + plugin_settings = settings.for_plugin("docc.python.discover") + + with pytest.raises(ValueError, match="python needs at least one path"): + PythonDiscover(plugin_settings) + + def test_discover_finds_python_files(self, temp_dir: Path) -> None: + (temp_dir / "test.py").write_text("# test") + + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.python.discover": {"paths": [str(temp_dir)]} + } + } + } + }, + ) + plugin_settings = settings.for_plugin("docc.python.discover") + discover = PythonDiscover(plugin_settings) + + sources = list(discover.discover(frozenset())) + assert len(sources) == 1 + assert isinstance(sources[0], PythonSource) + + def test_discover_finds_nested_python_files(self, temp_dir: Path) -> None: + subdir = temp_dir / "subdir" + subdir.mkdir() + (subdir / "nested.py").write_text("# nested") + + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.python.discover": {"paths": [str(temp_dir)]} + } + } + } + }, + ) + plugin_settings = settings.for_plugin("docc.python.discover") + discover = PythonDiscover(plugin_settings) + + sources = list(discover.discover(frozenset())) + assert len(sources) == 1 + assert "nested.py" in str(sources[0].relative_path) + + def test_excluded_paths(self, temp_dir: Path) -> None: + subdir = temp_dir / "exclude_me" + subdir.mkdir() + (subdir / "test.py").write_text("# excluded") + (temp_dir / "keep.py").write_text("# keep") + + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.python.discover": { + "paths": [str(temp_dir)], + "excluded_paths": ["exclude_me"], + } + } + } + } + }, + ) + plugin_settings = settings.for_plugin("docc.python.discover") + discover = PythonDiscover(plugin_settings) + + sources = list(discover.discover(frozenset())) + assert len(sources) == 1 + assert "keep.py" in str(sources[0].relative_path) + + def test_excluded_paths_non_sequence_raises(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.python.discover": { + "paths": [str(temp_dir)], + "excluded_paths": 123, + } + } + } + } + }, + ) + plugin_settings = settings.for_plugin("docc.python.discover") + + with pytest.raises(TypeError, match="excluded paths must be a list"): + PythonDiscover(plugin_settings) + + +class TestPythonSource: + def test_relative_path_property(self, temp_dir: Path) -> None: + relative = PurePath("test.py") + absolute = temp_dir / "test.py" + absolute.write_text("# test") + + source = PythonSource(temp_dir, relative, absolute) + assert source.relative_path == relative + + def test_output_path_property(self, temp_dir: Path) -> None: + relative = PurePath("subdir") / "test.py" + absolute = temp_dir / "subdir" / "test.py" + absolute.parent.mkdir(exist_ok=True) + absolute.write_text("# test") + + source = PythonSource(temp_dir, relative, absolute) + assert source.output_path == relative + + def test_open_returns_file_handle(self, temp_dir: Path) -> None: + content = "# test content\nx = 1" + relative = PurePath("test.py") + absolute = temp_dir / "test.py" + absolute.write_text(content) + + source = PythonSource(temp_dir, relative, absolute) + with source.open() as f: + assert f.read() == content + + +class TestPythonBuilder: + def test_build_simple_module(self, temp_dir: Path) -> None: + content = '''"""Module docstring.""" +x = 1 +''' + (temp_dir / "test.py").write_text(content) + + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.python.discover": {"paths": [str(temp_dir)]} + } + } + } + }, + ) + plugin_settings = settings.for_plugin("docc.python.discover") + discover = PythonDiscover(plugin_settings) + sources = set(discover.discover(frozenset())) + + builder = PythonBuilder(plugin_settings) + documents: Dict[Source, Document] = {} + builder.build(sources, documents) + + assert len(documents) == 1 + + def test_build_removes_sources_from_unprocessed( + self, temp_dir: Path + ) -> None: + (temp_dir / "test.py").write_text("x = 1") + + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "plugins": { + "docc.python.discover": {"paths": [str(temp_dir)]} + } + } + } + }, + ) + plugin_settings = settings.for_plugin("docc.python.discover") + discover = PythonDiscover(plugin_settings) + sources: Set[Source] = set(discover.discover(frozenset())) + original_count = len(sources) + + builder = PythonBuilder(plugin_settings) + documents: Dict[Source, Document] = {} + builder.build(sources, documents) + + assert len(sources) == 0 + assert len(documents) == original_count + + +class TestPythonNodes: + def test_module_default_fields(self) -> None: + module = nodes.Module() + assert isinstance(module.name, BlankNode) + assert isinstance(module.docstring, BlankNode) + assert isinstance(module.members, ListNode) + + def test_module_children(self) -> None: + module = nodes.Module() + children = list(module.children) + assert len(children) == 3 + + def test_module_to_search(self) -> None: + module = nodes.Module() + module.name = nodes.Name("test_module") + result = module.to_search() + assert isinstance(result, Mapping) + assert result["type"] == "module" + assert "test_module" in result["name"] + + def test_class_default_fields(self) -> None: + cls = nodes.Class() + assert isinstance(cls.decorators, ListNode) + assert isinstance(cls.name, BlankNode) + assert isinstance(cls.bases, ListNode) + assert isinstance(cls.metaclass, BlankNode) + assert isinstance(cls.docstring, BlankNode) + assert isinstance(cls.members, ListNode) + + def test_class_to_search(self) -> None: + cls = nodes.Class() + cls.name = nodes.Name("TestClass") + result = cls.to_search() + assert isinstance(result, Mapping) + assert result["type"] == "class" + assert "TestClass" in result["name"] + + def test_function_default_fields(self) -> None: + func = nodes.Function(asynchronous=False) + assert func.asynchronous is False + assert isinstance(func.decorators, ListNode) + assert isinstance(func.name, BlankNode) + assert isinstance(func.parameters, ListNode) + assert isinstance(func.return_type, BlankNode) + assert isinstance(func.docstring, BlankNode) + assert isinstance(func.body, BlankNode) + + def test_function_async(self) -> None: + func = nodes.Function(asynchronous=True) + assert func.asynchronous is True + + def test_function_to_search(self) -> None: + func = nodes.Function(asynchronous=False) + func.name = nodes.Name("test_func") + result = func.to_search() + assert isinstance(result, Mapping) + assert result["type"] == "function" + assert "test_func" in result["name"] + + def test_parameter(self) -> None: + param = nodes.Parameter() + assert param.star is None + assert isinstance(param.name, BlankNode) + assert isinstance(param.type_annotation, BlankNode) + + def test_parameter_with_star(self) -> None: + param = nodes.Parameter(star="*") + assert param.star == "*" + + double_star_param = nodes.Parameter(star="**") + assert double_star_param.star == "**" + + def test_attribute_to_search(self) -> None: + attr = nodes.Attribute() + attr.names = ListNode([nodes.Name("test_attr")]) + result = attr.to_search() + assert isinstance(result, Mapping) + assert result["type"] == "attribute" + assert "test_attr" in result["name"] + + def test_name_children_empty(self) -> None: + name = nodes.Name("test") + assert tuple(name.children) == () + + def test_name_replace_child_raises(self) -> None: + name = nodes.Name("test") + with pytest.raises(TypeError): + name.replace_child(BlankNode(), BlankNode()) + + def test_name_with_full_name(self) -> None: + name = nodes.Name("test", "module.test") + assert name.name == "test" + assert name.full_name == "module.test" + + def test_docstring_children_empty(self) -> None: + doc = nodes.Docstring("test docstring") + assert tuple(doc.children) == () + + def test_docstring_replace_child_raises(self) -> None: + doc = nodes.Docstring("test") + with pytest.raises(TypeError): + doc.replace_child(BlankNode(), BlankNode()) + + def test_docstring_to_search(self) -> None: + doc = nodes.Docstring("This is documentation") + assert doc.to_search() == "This is documentation" + + def test_type_node(self) -> None: + type_node = nodes.Type() + assert isinstance(type_node.child, BlankNode) + + def test_subscript_node(self) -> None: + sub = nodes.Subscript() + assert isinstance(sub.name, BlankNode) + assert isinstance(sub.generics, BlankNode) + + def test_binary_operation(self) -> None: + binop = nodes.BinaryOperation() + assert isinstance(binop.left, BlankNode) + assert isinstance(binop.operator, BlankNode) + assert isinstance(binop.right, BlankNode) + + def test_bit_or(self) -> None: + bit_or = nodes.BitOr() + children = list(bit_or.children) + assert len(children) == 0 + + def test_list_node(self) -> None: + list_node = nodes.List() + assert isinstance(list_node.elements, ListNode) + + def test_tuple_node(self) -> None: + tuple_node = nodes.Tuple() + assert isinstance(tuple_node.elements, ListNode) + + def test_access_node(self) -> None: + access = nodes.Access() + assert isinstance(access.value, BlankNode) + assert isinstance(access.attribute, BlankNode) + + +class TestPythonNodeRepr: + def test_module_repr(self) -> None: + module = nodes.Module() + assert repr(module) == "Module(...)" + + def test_class_repr(self) -> None: + cls = nodes.Class() + assert repr(cls) == "Class(...)" + + def test_function_repr(self) -> None: + func = nodes.Function(asynchronous=False) + assert repr(func) == "Function(...)" + + +class TestPythonNodeReplaceChild: + def test_replace_child_in_module(self) -> None: + old_name = nodes.Name("old") + new_name = nodes.Name("new") + module = nodes.Module() + module.name = old_name + + module.replace_child(old_name, new_name) + assert module.name == new_name + + def test_replace_child_not_found(self) -> None: + module = nodes.Module() + original_name = module.name + original_docstring = module.docstring + original_members = module.members + + old = nodes.Name("old") + new = nodes.Name("new") + + module.replace_child(old, new) + + # Verify original field values are unchanged after no-op replace + assert module.name is original_name + assert module.docstring is original_docstring + assert module.members is original_members + + +class TestNameVisitor: + def test_collect_single_name(self) -> None: + name = nodes.Name("test") + result = nodes._NameVisitor.collect(name) + assert result == ["test"] + + def test_collect_multiple_names(self) -> None: + names = [nodes.Name("a"), nodes.Name("b"), nodes.Name("c")] + result = nodes._NameVisitor.collect(names) + assert result == ["a", "b", "c"] + + def test_collect_from_list_node(self) -> None: + list_node = ListNode([nodes.Name("x"), nodes.Name("y")]) + result = nodes._NameVisitor.collect(list_node) + assert result == ["x", "y"] + + def test_collect_empty(self) -> None: + blank = BlankNode() + result = nodes._NameVisitor.collect(blank) + assert result == [] + + +class TestPythonNodeChildrenTypeError: + def test_children_raises_type_error_for_non_node_field(self) -> None: + """ + PythonNode.children raises TypeError when a field annotated + as Node contains a non-Node value. This documents the + defensive contract in nodes.py:44. + """ + module = nodes.Module() + # Forcefully set a Node-typed field to a non-Node value + object.__setattr__(module, "name", "not a node") + + with pytest.raises(TypeError, match="child not Node"): + list(module.children) diff --git a/tests/test_references.py b/tests/test_references.py new file mode 100644 index 0000000..604ecd3 --- /dev/null +++ b/tests/test_references.py @@ -0,0 +1,386 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import tempfile +from pathlib import Path, PurePath +from typing import Iterator, Optional + +import pytest + +from docc.context import Context +from docc.document import BlankNode, Document, ListNode +from docc.plugins.references import ( + Definition, + Index, + IndexContext, + IndexTransform, + Location, + Reference, + ReferenceError, +) +from docc.settings import PluginSettings, Settings +from docc.source import Source + + +@pytest.fixture +def temp_dir() -> Iterator[Path]: + with tempfile.TemporaryDirectory() as td: + yield Path(td) + + +@pytest.fixture +def basic_settings(temp_dir: Path) -> Settings: + return Settings(temp_dir, {"tool": {"docc": {}}}) + + +@pytest.fixture +def plugin_settings(basic_settings: Settings) -> PluginSettings: + return basic_settings.for_plugin("docc.references") + + +_UNSET = object() + + +class MockSource(Source): + _output_path: PurePath + + def __init__( + self, + relative_path: object = _UNSET, + output_path: Optional[PurePath] = None, + ) -> None: + if relative_path is _UNSET: + self._relative_path: Optional[PurePath] = PurePath("test.py") + elif relative_path is None or isinstance(relative_path, PurePath): + self._relative_path = relative_path + else: + raise TypeError(f"unexpected type: {type(relative_path)}") + self._output_path = ( + output_path or self._relative_path or PurePath("test.py") + ) + + @property + def relative_path(self) -> Optional[PurePath]: + return self._relative_path + + @property + def output_path(self) -> PurePath: + return self._output_path + + +class TestLocation: + def test_create_location(self) -> None: + source = MockSource() + location = Location(source=source, identifier="test.func", specifier=0) + + assert location.source is source + assert location.identifier == "test.func" + assert location.specifier == 0 + + def test_location_is_frozen(self) -> None: + source = MockSource() + location = Location(source=source, identifier="test", specifier=0) + + with pytest.raises(AttributeError): + location.identifier = "changed" # pyre-ignore[41] + + def test_location_equality(self) -> None: + source = MockSource() + first_location = Location( + source=source, identifier="test", specifier=0 + ) + second_location = Location( + source=source, identifier="test", specifier=0 + ) + third_location = Location( + source=source, identifier="test", specifier=1 + ) + + assert first_location == second_location + assert first_location != third_location + + def test_location_is_hashable(self) -> None: + source = MockSource() + location = Location(source=source, identifier="test", specifier=0) + location_set = {location} + assert location in location_set + + +class TestIndex: + def test_create_index(self) -> None: + index = Index() + assert isinstance(index._index, dict) + assert len(index._index) == 0 + + def test_define_creates_location(self) -> None: + index = Index() + source = MockSource() + + location = index.define(source, "test.module.func") + + assert location.source is source + assert location.identifier == "test.module.func" + assert location.specifier == 0 + + def test_define_increments_specifier(self) -> None: + index = Index() + source = MockSource() + + first_location = index.define(source, "test.func") + second_location = index.define(source, "test.func") + third_location = index.define(source, "test.func") + + assert first_location.specifier == 0 + assert second_location.specifier == 1 + assert third_location.specifier == 2 + + def test_define_different_identifiers(self) -> None: + index = Index() + source = MockSource() + + first_location = index.define(source, "func_a") + second_location = index.define(source, "func_b") + third_location = index.define(source, "func_a") + + assert first_location.specifier == 0 + assert second_location.specifier == 0 + assert third_location.specifier == 1 + + def test_lookup_existing(self) -> None: + index = Index() + source = MockSource() + expected = index.define(source, "test.func") + + result = list(index.lookup("test.func")) + + assert len(result) == 1 + assert result[0] == expected + + def test_lookup_multiple(self) -> None: + index = Index() + source = MockSource() + first_location = index.define(source, "test.func") + second_location = index.define(source, "test.func") + + result = list(index.lookup("test.func")) + + assert len(result) == 2 + assert first_location in result + assert second_location in result + + def test_lookup_nonexistent_raises(self) -> None: + index = Index() + + with pytest.raises(ReferenceError): + index.lookup("nonexistent") + + +class TestReferenceError: + def test_basic_error(self) -> None: + error = ReferenceError("undefined_func") + assert "undefined_func" in str(error) + assert error.identifier == "undefined_func" + assert error.context is None + + def test_error_with_context_source(self) -> None: + source = MockSource(relative_path=PurePath("src/module.py")) + context = Context({Source: source}) + error = ReferenceError("missing_ref", context=context) + + assert "missing_ref" in str(error) + assert "src/module.py" in str(error) + assert error.context is context + + def test_error_with_context_no_relative_path(self) -> None: + source = MockSource( + relative_path=None, output_path=PurePath("output.html") + ) + context = Context({Source: source}) + error = ReferenceError("missing_ref", context=context) + + assert "missing_ref" in str(error) + assert "output.html" in str(error) + + +class TestBase: + def test_children_returns_tuple(self) -> None: + child = BlankNode() + base = Definition(identifier="test", child=child) + + assert base.children == (child,) + + def test_default_child_is_blank(self) -> None: + base = Definition(identifier="test") + + assert isinstance(base.child, BlankNode) + + def test_replace_child(self) -> None: + old_child = BlankNode() + new_child = BlankNode() + base = Definition(identifier="test", child=old_child) + + base.replace_child(old_child, new_child) + + assert base.child is new_child + + def test_replace_child_no_match(self) -> None: + child = BlankNode() + other = BlankNode() + new_child = BlankNode() + base = Definition(identifier="test", child=child) + + base.replace_child(other, new_child) + + assert base.child is child + + +class TestDefinition: + def test_create_definition(self) -> None: + child = BlankNode() + definition = Definition(identifier="test.func", child=child) + + assert definition.identifier == "test.func" + assert definition.child is child + assert definition.specifier is None + + def test_specifier_can_be_set(self) -> None: + definition = Definition(identifier="test", specifier=5) + assert definition.specifier == 5 + + +class TestReference: + def test_create_reference(self) -> None: + child = BlankNode() + reference = Reference(identifier="test.func", child=child) + + assert reference.identifier == "test.func" + assert reference.child is child + + +class TestIndexContext: + def test_provides_index(self) -> None: + assert IndexContext.provides() == Index + + def test_init_creates_index(self, plugin_settings: PluginSettings) -> None: + ctx = IndexContext(plugin_settings) + assert isinstance(ctx.index, Index) + + def test_provide_returns_index( + self, plugin_settings: PluginSettings + ) -> None: + ctx = IndexContext(plugin_settings) + provided = ctx.provide() + + assert provided is ctx.index + + +class TestIndexTransform: + def test_transform_indexes_definitions( + self, plugin_settings: PluginSettings + ) -> None: + source = MockSource() + index = Index() + + definition = Definition(identifier="test.func") + root = ListNode([definition]) + document = Document(root) + + context = Context({Document: document, Source: source, Index: index}) + + transform = IndexTransform(plugin_settings) + transform.transform(context) + + assert definition.specifier == 0 + + locations = list(index.lookup("test.func")) + assert len(locations) == 1 + assert locations[0].identifier == "test.func" + + def test_transform_nested_definitions( + self, plugin_settings: PluginSettings + ) -> None: + source = MockSource() + index = Index() + + inner_def = Definition(identifier="inner") + outer_def = Definition(identifier="outer", child=inner_def) + root = ListNode([outer_def]) + document = Document(root) + + context = Context({Document: document, Source: source, Index: index}) + + transform = IndexTransform(plugin_settings) + transform.transform(context) + + assert outer_def.specifier == 0 + assert inner_def.specifier == 0 + + outer_locations = list(index.lookup("outer")) + inner_locations = list(index.lookup("inner")) + assert len(outer_locations) == 1 + assert len(inner_locations) == 1 + + def test_transform_multiple_definitions_same_id( + self, plugin_settings: PluginSettings + ) -> None: + source = MockSource() + index = Index() + + first_definition = Definition(identifier="same_id") + second_definition = Definition(identifier="same_id") + root = ListNode([first_definition, second_definition]) + document = Document(root) + + context = Context({Document: document, Source: source, Index: index}) + + transform = IndexTransform(plugin_settings) + transform.transform(context) + + assert first_definition.specifier == 0 + assert second_definition.specifier == 1 + + def test_transform_ignores_references( + self, plugin_settings: PluginSettings + ) -> None: + source = MockSource() + index = Index() + + reference = Reference(identifier="some_ref") + root = ListNode([reference]) + document = Document(root) + + context = Context({Document: document, Source: source, Index: index}) + + transform = IndexTransform(plugin_settings) + transform.transform(context) + + with pytest.raises(ReferenceError): + index.lookup("some_ref") + + +class TestDefinitionReferenceInteraction: + def test_definition_child_is_reference(self) -> None: + ref = Reference(identifier="other") + definition = Definition(identifier="test", child=ref) + + assert definition.child is ref + assert definition.children == (ref,) + + def test_reference_child_is_definition(self) -> None: + definition = Definition(identifier="inner") + reference = Reference(identifier="test", child=definition) + + assert reference.child is definition + assert reference.children == (definition,) diff --git a/tests/test_resources.py b/tests/test_resources.py new file mode 100644 index 0000000..b17c2e5 --- /dev/null +++ b/tests/test_resources.py @@ -0,0 +1,155 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from io import StringIO +from pathlib import Path, PurePath +from typing import Dict, Set + +import pytest + +from docc.context import Context +from docc.document import BlankNode, Document +from docc.plugins.resources import ( + ResourceBuilder, + ResourceNode, + ResourceSource, +) +from docc.settings import PluginSettings, Settings +from docc.source import Source + + +@pytest.fixture +def plugin_settings() -> PluginSettings: + settings = Settings(Path("."), {"tool": {"docc": {}}}) + return settings.for_plugin("docc.resources.build") + + +class TestResourceSource: + def test_with_path_creates_source(self) -> None: + source = ResourceSource.with_path( + "docc.plugins.html", + PurePath("static") / "docc.css", + PurePath("static") / "docc", + ) + assert source is not None + assert source.output_path == PurePath("static") / "docc" + assert source.extension == ".css" + + def test_relative_path_is_none(self) -> None: + source = ResourceSource.with_path( + "docc.plugins.html", + PurePath("static") / "docc.css", + PurePath("static") / "docc", + ) + assert source.relative_path is None + + def test_output_path(self) -> None: + source = ResourceSource.with_path( + "docc.plugins.html", + PurePath("static") / "docc.css", + PurePath("output") / "style", + ) + assert source.output_path == PurePath("output") / "style" + + +class TestResourceNode: + def test_children_empty(self) -> None: + source = ResourceSource.with_path( + "docc.plugins.html", + PurePath("static") / "docc.css", + PurePath("static") / "docc", + ) + node = ResourceNode(source.resource, source.extension) + assert node.children == () + + def test_extension(self) -> None: + source = ResourceSource.with_path( + "docc.plugins.html", + PurePath("static") / "docc.css", + PurePath("static") / "docc", + ) + node = ResourceNode(source.resource, source.extension) + assert node.extension == ".css" + + def test_replace_child_raises(self) -> None: + source = ResourceSource.with_path( + "docc.plugins.html", + PurePath("static") / "docc.css", + PurePath("static") / "docc", + ) + node = ResourceNode(source.resource, source.extension) + + with pytest.raises(TypeError): + node.replace_child(BlankNode(), BlankNode()) + + def test_output(self) -> None: + source = ResourceSource.with_path( + "docc.plugins.html", + PurePath("static") / "docc.css", + PurePath("static") / "docc", + ) + node = ResourceNode(source.resource, source.extension) + context = Context({}) + destination = StringIO() + + node.output(context, destination) + + result = destination.getvalue() + assert len(result) > 0, "Output should not be empty" + assert "{" in result, "CSS output should contain style blocks" + + +class TestResourceBuilder: + def test_build_processes_resource_sources( + self, plugin_settings: PluginSettings + ) -> None: + source = ResourceSource.with_path( + "docc.plugins.html", + PurePath("static") / "docc.css", + PurePath("static") / "docc", + ) + + unprocessed: Set[Source] = {source} + processed: Dict[Source, Document] = {} + + builder = ResourceBuilder(plugin_settings) + builder.build(unprocessed, processed) + + assert len(unprocessed) == 0 + assert len(processed) == 1 + assert source in processed + assert isinstance(processed[source].root, ResourceNode) + + def test_build_ignores_non_resource_sources( + self, plugin_settings: PluginSettings + ) -> None: + class OtherSource(Source): + @property + def relative_path(self): + return PurePath("other.py") + + @property + def output_path(self): + return PurePath("other.py") + + source = OtherSource() + unprocessed: Set[Source] = {source} + processed: Dict[Source, Document] = {} + + builder = ResourceBuilder(plugin_settings) + builder.build(unprocessed, processed) + + assert source in unprocessed + assert len(processed) == 0 diff --git a/tests/test_search.py b/tests/test_search.py new file mode 100644 index 0000000..dd6462c --- /dev/null +++ b/tests/test_search.py @@ -0,0 +1,455 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import json +from io import StringIO +from pathlib import Path, PurePath +from typing import Dict, Optional, Set + +import pytest + +from docc.context import Context +from docc.document import BlankNode, Document +from docc.plugins.references import Definition, Index, ReferenceError +from docc.plugins.search import ( + ByReference, + BySource, + Item, + Search, + Searchable, + SearchBuilder, + SearchContext, + SearchDiscover, + SearchNode, + SearchSource, + SearchTransform, + _SearchVisitor, +) +from docc.settings import PluginSettings, Settings +from docc.source import Source + + +@pytest.fixture +def plugin_settings() -> PluginSettings: + settings = Settings(Path("."), {"tool": {"docc": {}}}) + return settings.for_plugin("docc.search") + + +class MockSource(Source): + _path: PurePath + + def __init__(self, path: Optional[PurePath] = None) -> None: + self._path = path if path is not None else PurePath("mock.py") + + @property + def relative_path(self) -> Optional[PurePath]: + return self._path + + @property + def output_path(self) -> PurePath: + return self._path + + +class TestBySource: + def test_create(self) -> None: + source = MockSource() + location = BySource(source=source) + assert location.source is source + + def test_frozen(self) -> None: + source = MockSource() + location = BySource(source=source) + with pytest.raises(AttributeError): + location.source = MockSource() # pyre-ignore[41] + + def test_equality(self) -> None: + source = MockSource() + first_location = BySource(source=source) + second_location = BySource(source=source) + assert first_location == second_location + + +class TestByReference: + def test_create(self) -> None: + location = ByReference(identifier="test.func", specifier=0) + assert location.identifier == "test.func" + assert location.specifier == 0 + + def test_create_without_specifier(self) -> None: + location = ByReference(identifier="test.func", specifier=None) + assert location.specifier is None + + def test_frozen(self) -> None: + location = ByReference(identifier="test", specifier=0) + with pytest.raises(AttributeError): + location.identifier = "changed" # pyre-ignore[41] + + +class TestItem: + def test_create_with_string_content(self) -> None: + source = MockSource() + location = BySource(source=source) + item = Item(location=location, content="test content") + + assert item.location is location + assert item.content == "test content" + + def test_create_with_dict_content(self) -> None: + source = MockSource() + location = BySource(source=source) + item = Item( + location=location, content={"type": "module", "name": ["test"]} + ) + + assert isinstance(item.content, dict) + assert item.content["type"] == "module" + + +class TestSearch: + def test_init(self) -> None: + search = Search() + assert len(search._items) == 0 + + def test_add_string_content(self) -> None: + search = Search() + source = MockSource() + location = BySource(source=source) + item = Item(location=location, content="test content") + + search.add(item) + + # Note: Accessing _items directly as Search has no public query API + assert location in search._items + assert "text" in search._items[location] + assert "test content" in search._items[location]["text"] + + def test_add_dict_content(self) -> None: + search = Search() + source = MockSource() + location = BySource(source=source) + item = Item( + location=location, content={"type": "module", "name": ["test"]} + ) + + search.add(item) + + assert location in search._items + assert "type" in search._items[location] + assert "name" in search._items[location] + + def test_add_multiple_items_same_location(self) -> None: + search = Search() + source = MockSource() + location = BySource(source=source) + + search.add(Item(location=location, content="first")) + search.add(Item(location=location, content="second")) + + assert "first" in search._items[location]["text"] + assert "second" in search._items[location]["text"] + + +class TestSearchSource: + def test_relative_path_is_none(self) -> None: + source = SearchSource() + assert source.relative_path is None + + def test_output_path(self) -> None: + source = SearchSource() + assert source.output_path == PurePath("search") + + +class TestSearchNode: + def test_extension(self) -> None: + node = SearchNode() + assert node.extension == ".js" + + +class TestSearchBuilder: + def test_build_processes_search_sources( + self, plugin_settings: PluginSettings + ) -> None: + source = SearchSource() + unprocessed: Set[Source] = {source} + processed: Dict[Source, Document] = {} + + builder = SearchBuilder(plugin_settings) + builder.build(unprocessed, processed) + + assert len(unprocessed) == 0 + assert source in processed + assert isinstance(processed[source].root, SearchNode) + + def test_build_ignores_non_search_sources( + self, plugin_settings: PluginSettings + ) -> None: + source = MockSource() + unprocessed: Set[Source] = {source} + processed: Dict[Source, Document] = {} + + builder = SearchBuilder(plugin_settings) + builder.build(unprocessed, processed) + + assert source in unprocessed + assert len(processed) == 0 + + +class TestSearchDiscover: + def test_discover_yields_search_source( + self, plugin_settings: PluginSettings + ) -> None: + discover = SearchDiscover(plugin_settings) + sources = list(discover.discover(frozenset())) + + assert len(sources) == 1 + assert isinstance(sources[0], SearchSource) + + +class TestSearchContext: + def test_provides(self) -> None: + assert SearchContext.provides() == Search + + def test_init(self, plugin_settings: PluginSettings) -> None: + ctx = SearchContext(plugin_settings) + assert isinstance(ctx.search, Search) + + def test_provide(self, plugin_settings: PluginSettings) -> None: + ctx = SearchContext(plugin_settings) + provided = ctx.provide() + assert provided is ctx.search + + +class TestSearchNodeOutput: + def test_output_by_source(self) -> None: + """ + Test SearchNode.output() serializes search index to + JavaScript JSON with items indexed by BySource. + """ + source = MockSource(PurePath("module.py")) + search = Search() + search.add( + Item(location=BySource(source=source), content="hello world") + ) + index = Index() + context = Context({Search: search, Index: index}) + + node = SearchNode() + dest = StringIO() + node.output(context, dest) + + output = dest.getvalue() + assert output.startswith("this.SEARCH_INDEX = ") + assert output.endswith("; Object.freeze(this.SEARCH_INDEX);") + # Parse the JSON portion + json_str = output[ + len("this.SEARCH_INDEX = ") : -len( + "; Object.freeze(this.SEARCH_INDEX);" + ) + ] + data = json.loads(json_str) + assert len(data) == 1 + assert data[0]["source"]["path"] == "module.py" + assert "hello world" in data[0]["content"]["text"] + + def test_output_by_reference_without_specifier(self) -> None: + """Test SearchNode.output() resolves ByReference location via Index.""" + source = MockSource(PurePath("ref_module.py")) + search = Search() + location = ByReference(identifier="my.module.func", specifier=None) + search.add(Item(location=location, content="func docs")) + + index = Index() + index.define(source, "my.module.func") + + context = Context({Search: search, Index: index}) + node = SearchNode() + dest = StringIO() + node.output(context, dest) + + output = dest.getvalue() + json_str = output[ + len("this.SEARCH_INDEX = ") : -len( + "; Object.freeze(this.SEARCH_INDEX);" + ) + ] + data = json.loads(json_str) + assert len(data) == 1 + assert data[0]["source"]["identifier"] == "my.module.func" + assert data[0]["source"]["path"] == "ref_module.py" + + def test_output_by_reference_with_specifier(self) -> None: + """Test SearchNode.output() resolves ByReference with specifier.""" + source = MockSource(PurePath("spec_module.py")) + search = Search() + location = ByReference(identifier="my.ident", specifier=0) + search.add(Item(location=location, content="spec docs")) + + index = Index() + index.define(source, "my.ident") # specifier=0 + + context = Context({Search: search, Index: index}) + node = SearchNode() + dest = StringIO() + node.output(context, dest) + + output = dest.getvalue() + json_str = output[ + len("this.SEARCH_INDEX = ") : -len( + "; Object.freeze(this.SEARCH_INDEX);" + ) + ] + data = json.loads(json_str) + assert len(data) == 1 + assert data[0]["source"]["specifier"] == 0 + assert data[0]["source"]["path"] == "spec_module.py" + + def test_output_by_reference_specifier_not_found_raises(self) -> None: + """ + Test ByReference with specifier not found raises + ReferenceError. + """ + source = MockSource(PurePath("err_module.py")) + search = Search() + # Use specifier=99 which won't match any definition + location = ByReference(identifier="my.missing", specifier=99) + search.add(Item(location=location, content="should fail")) + + index = Index() + index.define(source, "my.missing") # specifier=0 + + context = Context({Search: search, Index: index}) + node = SearchNode() + dest = StringIO() + with pytest.raises(ReferenceError): + node.output(context, dest) + + def test_output_mixed_sources_and_references(self) -> None: + """ + Test SearchNode.output() with both BySource and + ByReference items. + """ + source = MockSource(PurePath("mixed.py")) + search = Search() + search.add( + Item(location=BySource(source=source), content="source item") + ) + ref_location = ByReference(identifier="mixed.func", specifier=None) + search.add(Item(location=ref_location, content="ref item")) + + index = Index() + index.define(source, "mixed.func") + + context = Context({Search: search, Index: index}) + node = SearchNode() + dest = StringIO() + node.output(context, dest) + + output = dest.getvalue() + json_str = output[ + len("this.SEARCH_INDEX = ") : -len( + "; Object.freeze(this.SEARCH_INDEX);" + ) + ] + data = json.loads(json_str) + assert len(data) == 2 + + +class TestSearchTransform: + def test_transform_indexes_searchable_under_definition( + self, plugin_settings: PluginSettings + ) -> None: + """ + Test that transform() indexes Searchable nodes wrapped in + Definitions as ByReference with the correct identifier. + """ + source = MockSource(PurePath("test_transform.py")) + search = Search() + + searchable = MockSearchable("indexed content") + definition = Definition(identifier="my.module.MyClass") + definition.specifier = 0 + definition.child = searchable + + document = Document(definition) + context = Context({Source: source, Search: search, Document: document}) + + transform = SearchTransform(plugin_settings) + transform.transform(context) + + # The searchable should be indexed as ByReference + by_ref = ByReference(identifier="my.module.MyClass", specifier=0) + assert by_ref in search._items + assert "indexed content" in search._items[by_ref]["text"] + + +class MockSearchable(BlankNode, Searchable): + def __init__( + self, content: str = "test content", search_children_val: bool = True + ) -> None: + self._content = content + self._search_children = search_children_val + + def to_search(self) -> str: + return self._content + + def search_children(self) -> bool: + return self._search_children + + +class TestSearchVisitor: + def test_adds_searchable_content( + self, plugin_settings: PluginSettings + ) -> None: + source = MockSource() + search = Search() + document = Document(MockSearchable("searchable content")) + context = Context({Source: source, Search: search, Document: document}) + + visitor = _SearchVisitor(context) + document.root.visit(visitor) + + by_source = BySource(source=source) + assert by_source in search._items + assert "searchable content" in search._items[by_source]["text"] + + def test_respects_search_children( + self, plugin_settings: PluginSettings + ) -> None: + source = MockSource() + search = Search() + inner = MockSearchable("inner") + + class ParentNode(MockSearchable): + def __init__(self): + super().__init__("parent", search_children_val=False) + self._children = [inner] + + @property + def children(self): + return self._children + + document = Document(ParentNode()) + context = Context({Source: source, Search: search, Document: document}) + + visitor = _SearchVisitor(context) + document.root.visit(visitor) + + # Note: Accessing _items is necessary as Search has no public query API + by_source = BySource(source=source) + assert "parent" in search._items[by_source]["text"] + # Verify children were NOT indexed when search_children=False + assert ( + "inner" not in search._items[by_source]["text"] + ), "Children should be skipped when search_children() returns False" diff --git a/tests/test_settings.py b/tests/test_settings.py new file mode 100644 index 0000000..3c6e903 --- /dev/null +++ b/tests/test_settings.py @@ -0,0 +1,307 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import tempfile +from pathlib import Path, PurePath +from typing import Iterator + +import pytest + +from docc.settings import ( + FILE_NAME, + MAX_DEPTH, + Output, + PluginSettings, + Settings, + SettingsError, +) + + +@pytest.fixture +def temp_dir() -> Iterator[Path]: + with tempfile.TemporaryDirectory() as td: + yield Path(td) + + +class TestOutput: + def test_create_with_path(self) -> None: + output = Output(path=Path("/output/docs")) + assert output.path == Path("/output/docs") + + def test_create_with_none(self) -> None: + output = Output(path=None) + assert output.path is None + + +class TestPluginSettings: + def test_init(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + plugin_settings = PluginSettings(settings, {"key": "value"}) + + assert plugin_settings["key"] == "value" + + def test_len(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + plugin_settings = PluginSettings(settings, {"a": 1, "b": 2, "c": 3}) + + assert len(plugin_settings) == 3 + + def test_iter(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + plugin_settings = PluginSettings(settings, {"a": 1, "b": 2}) + + keys = list(plugin_settings) + assert "a" in keys + assert "b" in keys + + def test_getitem(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + plugin_settings = PluginSettings(settings, {"test_key": "test_value"}) + + assert plugin_settings["test_key"] == "test_value" + + def test_getitem_missing_raises(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + plugin_settings = PluginSettings(settings, {}) + + with pytest.raises(KeyError): + plugin_settings["missing"] + + def test_get_with_default(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + plugin_settings = PluginSettings(settings, {}) + + assert plugin_settings.get("missing", "default") == "default" + + def test_resolve_path(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + plugin_settings = PluginSettings(settings, {}) + + resolved = plugin_settings.resolve_path(PurePath("subdir")) + assert resolved.is_absolute() + assert str(temp_dir) in str(resolved) + + def test_unresolve_path(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + plugin_settings = PluginSettings(settings, {}) + + absolute = temp_dir / "subdir" / "file.py" + relative = plugin_settings.unresolve_path(absolute) + + assert not relative.is_absolute() + + +class TestSettings: + def test_init_with_empty_tool_docc(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + assert isinstance(settings.context, list) + assert settings.output.path is None + + def test_init_without_tool_key(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {}) + assert isinstance(settings.context, list) + assert settings.output.path is None + + def test_init_with_invalid_tool_type(self, temp_dir: Path) -> None: + with pytest.raises(TypeError, match="must be a dict"): + Settings(temp_dir, {"tool": "not_a_dict"}) + + def test_output_path_from_settings(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + {"tool": {"docc": {"output": {"path": "docs"}}}}, + ) + assert settings.output.path == Path("docs") + + def test_output_path_none_when_not_specified(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + assert settings.output.path is None + + def test_for_plugin(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + { + "tool": { + "docc": {"plugins": {"test.plugin": {"option": "value"}}} + } + }, + ) + plugin_settings = settings.for_plugin("test.plugin") + + assert plugin_settings["option"] == "value" + + def test_for_plugin_missing_returns_empty(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + plugin_settings = settings.for_plugin("nonexistent.plugin") + + assert len(plugin_settings) == 0 + + def test_context_default(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + context = settings.context + + assert isinstance(context, list) + assert "docc.references.context" in context + assert "docc.search.context" in context + assert "docc.html.context" in context + + def test_context_custom(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + {"tool": {"docc": {"context": ["custom.context"]}}}, + ) + context = settings.context + + assert context == ["custom.context"] + + def test_discovery_default(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + discovery = settings.discovery + + assert isinstance(discovery, list) + assert "docc.python.discover" in discovery + assert "docc.html.discover" in discovery + + def test_discovery_custom(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + {"tool": {"docc": {"discovery": ["custom.discover"]}}}, + ) + discovery = settings.discovery + + assert discovery == ["custom.discover"] + + def test_build_default(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + build = settings.build + + assert isinstance(build, list) + assert "docc.python.build" in build + + def test_build_custom(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + {"tool": {"docc": {"build": ["custom.build"]}}}, + ) + build = settings.build + + assert build == ["custom.build"] + + def test_transform_default(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + transform = settings.transform + + assert isinstance(transform, list) + assert "docc.python.transform" in transform + assert "docc.html.transform" in transform + + def test_transform_custom(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + {"tool": {"docc": {"transform": ["custom.transform"]}}}, + ) + transform = settings.transform + + assert transform == ["custom.transform"] + + def test_resolve_path(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + resolved = settings.resolve_path(PurePath("subdir")) + + assert resolved.is_absolute() + assert str(temp_dir) in str(resolved) + + def test_resolve_path_escapes_root_raises(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + + with pytest.raises(ValueError): + settings.resolve_path(PurePath("../escape")) + + def test_unresolve_path(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + absolute = temp_dir / "subdir" / "file.py" + relative = settings.unresolve_path(absolute) + + assert not relative.is_absolute() + assert relative == PurePath("subdir") / "file.py" + + +class TestSettingsFromFile: + def test_from_file_finds_pyproject_toml(self, temp_dir: Path) -> None: + pyproject = temp_dir / "pyproject.toml" + pyproject.write_text('[tool.docc]\noutput = { path = "docs" }\n') + + settings = Settings.from_file(temp_dir) + + assert settings.output.path == Path("docs") + + def test_from_file_searches_parent_directories( + self, temp_dir: Path + ) -> None: + pyproject = temp_dir / "pyproject.toml" + pyproject.write_text('[tool.docc]\noutput = { path = "docs" }\n') + + subdir = temp_dir / "src" / "submodule" + subdir.mkdir(parents=True) + + settings = Settings.from_file(subdir) + + assert settings.output.path == Path("docs") + + def test_from_file_respects_max_depth(self, temp_dir: Path) -> None: + deep_path = temp_dir + for i in range(MAX_DEPTH + 5): + deep_path = deep_path / f"level{i}" + deep_path.mkdir(parents=True) + + with pytest.raises(SettingsError, match="could not find"): + Settings.from_file(deep_path) + + def test_from_file_not_found_raises(self, temp_dir: Path) -> None: + with pytest.raises(SettingsError, match="could not find"): + Settings.from_file(temp_dir) + + def test_from_file_with_complete_config(self, temp_dir: Path) -> None: + config = """ +[tool.docc] +context = ["custom.context"] +discovery = ["custom.discover"] +build = ["custom.build"] +transform = ["custom.transform"] + +[tool.docc.output] +path = "output" + +[tool.docc.plugins."custom.plugin"] +option = "value" +""" + pyproject = temp_dir / "pyproject.toml" + pyproject.write_text(config) + + settings = Settings.from_file(temp_dir) + + assert settings.context == ["custom.context"] + assert settings.discovery == ["custom.discover"] + assert settings.build == ["custom.build"] + assert settings.transform == ["custom.transform"] + assert settings.output.path == Path("output") + + +class TestSettingsConstants: + def test_max_depth(self) -> None: + assert MAX_DEPTH == 10 + + def test_file_name(self) -> None: + assert FILE_NAME == "pyproject.toml" diff --git a/tests/test_source.py b/tests/test_source.py new file mode 100644 index 0000000..e61ce25 --- /dev/null +++ b/tests/test_source.py @@ -0,0 +1,184 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import tempfile +from io import StringIO +from pathlib import Path, PurePath +from typing import Optional, TextIO + +import pytest + +from docc.source import Source, TextSource + + +class ConcreteSource(Source): + _output: PurePath + + def __init__( + self, + relative: Optional[PurePath] = None, + output: Optional[PurePath] = None, + ) -> None: + self._relative = relative + self._output = output or PurePath("output.html") + + @property + def relative_path(self) -> Optional[PurePath]: + return self._relative + + @property + def output_path(self) -> PurePath: + return self._output + + +class ConcreteTextSource(TextSource): + _relative: PurePath + _output: PurePath + + def __init__( + self, + content: str, + relative: Optional[PurePath] = None, + output: Optional[PurePath] = None, + ) -> None: + self._content = content + self._relative = relative or PurePath("test.py") + self._output = output or self._relative + + @property + def relative_path(self) -> Optional[PurePath]: + return self._relative + + @property + def output_path(self) -> PurePath: + return self._output + + def open(self) -> TextIO: + return StringIO(self._content) + + +class TestSource: + def test_repr_with_relative_path(self) -> None: + source = ConcreteSource(relative=PurePath("src/module.py")) + result = repr(source) + + assert "src/module.py" in result + assert "ConcreteSource" in result + + def test_repr_without_relative_path(self) -> None: + source = ConcreteSource(relative=None) + result = repr(source) + + assert "ConcreteSource" in result + + def test_output_path(self) -> None: + source = ConcreteSource(output=PurePath("docs/output.html")) + assert source.output_path == PurePath("docs/output.html") + + +class TestTextSource: + def test_open_returns_text_io(self) -> None: + source = ConcreteTextSource("content") + with source.open() as f: + assert f.read() == "content" + + def test_line_returns_correct_line(self) -> None: + content = "line1\nline2\nline3" + source = ConcreteTextSource(content) + + assert source.line(1) == "line1" + assert source.line(2) == "line2" + assert source.line(3) == "line3" + + def test_line_out_of_range_raises(self) -> None: + content = "line1\nline2" + source = ConcreteTextSource(content) + + with pytest.raises(IndexError, match="line 10 out of range"): + source.line(10) + + def test_line_single_line(self) -> None: + content = "single line" + source = ConcreteTextSource(content) + + assert source.line(1) == "single line" + + def test_line_empty_content(self) -> None: + content = "" + source = ConcreteTextSource(content) + + assert source.line(1) == "" + + def test_line_with_empty_lines(self) -> None: + content = "first\n\nthird" + source = ConcreteTextSource(content) + + assert source.line(1) == "first" + assert source.line(2) == "" + assert source.line(3) == "third" + + +class TestTextSourceBoundary: + def test_line_zero_returns_last_line(self) -> None: + """ + line(0) computes lines[0 - 1] = lines[-1], which silently + returns the last line due to Python negative indexing. + """ + content = "first\nsecond\nthird" + source = ConcreteTextSource(content) + # line(0) accesses lines[-1] which is "third" + assert source.line(0) == "third" + + def test_line_negative_one_returns_second_to_last(self) -> None: + """ + line(-1) computes lines[-1 - 1] = lines[-2], which silently + returns the second-to-last line due to Python negative indexing. + """ + content = "first\nsecond\nthird" + source = ConcreteTextSource(content) + # line(-1) accesses lines[-2] which is "second" + assert source.line(-1) == "second" + + +class TestTextSourceWithFiles: + def test_line_from_real_file(self) -> None: + with tempfile.NamedTemporaryFile( + mode="w", suffix=".py", delete=False + ) as f: + f.write("# Line 1\n# Line 2\n# Line 3\n") + f.flush() + path = Path(f.name) + + class FileTextSource(TextSource): + def __init__(self, file_path: Path): + self._path = file_path + + @property + def relative_path(self) -> Optional[PurePath]: + return PurePath(self._path.name) + + @property + def output_path(self) -> PurePath: + return PurePath(self._path.name) + + def open(self) -> TextIO: + return open(self._path, "r") + + try: + source = FileTextSource(path) + assert source.line(1) == "# Line 1" + assert source.line(2) == "# Line 2" + finally: + path.unlink() diff --git a/tests/test_transform.py b/tests/test_transform.py new file mode 100644 index 0000000..12d0c2d --- /dev/null +++ b/tests/test_transform.py @@ -0,0 +1,101 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import tempfile +from pathlib import Path +from typing import Iterator + +import pytest + +from docc.context import Context +from docc.settings import PluginSettings, Settings +from docc.transform import Transform, load + + +@pytest.fixture +def temp_dir() -> Iterator[Path]: + with tempfile.TemporaryDirectory() as td: + yield Path(td) + + +class ConcreteTransform(Transform): + def __init__(self, config: PluginSettings) -> None: + self.config = config + + def transform(self, context: Context) -> None: + pass + + +class TestTransform: + def test_concrete_transform_init(self, temp_dir: Path) -> None: + settings = Settings(temp_dir, {"tool": {"docc": {}}}) + plugin_settings = settings.for_plugin("test") + + transform = ConcreteTransform(plugin_settings) + assert transform.config is plugin_settings + + +class TestTransformLoad: + def test_load_empty_transform_list(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + {"tool": {"docc": {"transform": []}}}, + ) + + result = load(settings) + assert result == [] + + def test_load_single_transform(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + {"tool": {"docc": {"transform": ["docc.python.transform"]}}}, + ) + + result = load(settings) + assert len(result) == 1 + assert result[0][0] == "docc.python.transform" + + def test_load_multiple_transforms(self, temp_dir: Path) -> None: + settings = Settings( + temp_dir, + { + "tool": { + "docc": { + "transform": [ + "docc.python.transform", + "docc.mistletoe.transform", + ] + } + } + }, + ) + + result = load(settings) + assert len(result) == 2 + + def test_load_preserves_order(self, temp_dir: Path) -> None: + transforms = [ + "docc.python.transform", + "docc.mistletoe.transform", + "docc.html.transform", + ] + settings = Settings( + temp_dir, + {"tool": {"docc": {"transform": transforms}}}, + ) + + result = load(settings) + for i, (name, _) in enumerate(result): + assert name == transforms[i] diff --git a/tests/test_verbatim.py b/tests/test_verbatim.py new file mode 100644 index 0000000..d9302df --- /dev/null +++ b/tests/test_verbatim.py @@ -0,0 +1,570 @@ +# Copyright (C) 2025 Ethereum Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from io import StringIO +from pathlib import Path, PurePath +from typing import Any, List, Optional, Sequence, TextIO + +import pytest + +from docc.context import Context +from docc.document import BlankNode, Document +from docc.plugins.html import HTMLTag, TextNode +from docc.plugins.verbatim import ( + Fragment, + Highlight, + Line, + Pos, + Text, + Transcribe, + Transcribed, + Verbatim, + VerbatimVisitor, + _BoundsVisitor, +) +from docc.settings import PluginSettings, Settings +from docc.source import TextSource + + +class MockTextSource(TextSource): + _path: PurePath + + def __init__(self, content: str, path: Optional[PurePath] = None) -> None: + self._content = content + self._path = path if path is not None else PurePath("test.py") + + @property + def relative_path(self) -> Optional[PurePath]: + return self._path + + @property + def output_path(self) -> PurePath: + return self._path + + def open(self) -> TextIO: + return StringIO(self._content) + + +@pytest.fixture +def plugin_settings() -> PluginSettings: + settings = Settings(Path("."), {"tool": {"docc": {}}}) + return settings.for_plugin("docc.verbatim.transform") + + +class TestPos: + def test_create(self) -> None: + pos = Pos(line=1, column=5) + assert pos.line == 1 + assert pos.column == 5 + + def test_frozen(self) -> None: + pos = Pos(line=1, column=5) + with pytest.raises(AttributeError): + pos.line = 2 # pyre-ignore[41] + + def test_repr(self) -> None: + pos = Pos(line=10, column=20) + assert repr(pos) == "10:20" + + def test_ordering(self) -> None: + first_pos = Pos(line=1, column=0) + second_pos = Pos(line=1, column=5) + third_pos = Pos(line=2, column=0) + + assert first_pos < second_pos < third_pos + + def test_equality(self) -> None: + first_pos = Pos(line=1, column=5) + second_pos = Pos(line=1, column=5) + assert first_pos == second_pos + + +class TestText: + def test_create(self) -> None: + text = Text(text="hello") + assert text.text == "hello" + + def test_children_empty(self) -> None: + text = Text(text="hello") + assert text.children == () + + def test_replace_child_raises(self) -> None: + text = Text(text="hello") + with pytest.raises(TypeError): + text.replace_child(BlankNode(), BlankNode()) + + +class TestLine: + def test_create(self) -> None: + line = Line(number=1) + assert line.number == 1 + assert list(line.children) == [] + + def test_children_with_content(self) -> None: + line = Line(number=1, _children=[Text("hello")]) + children = list(line.children) + assert len(children) == 1 + assert isinstance(children[0], Text) + + def test_replace_child(self) -> None: + old = Text("old") + new = Text("new") + line = Line(number=1, _children=[old]) + + line.replace_child(old, new) + + children = list(line.children) + assert new in children + assert old not in children + + def test_repr(self) -> None: + line = Line(number=5) + assert "Line" in repr(line) + assert "5" in repr(line) + + +class TestHighlight: + def test_create(self) -> None: + highlight = Highlight(highlights=["keyword"]) + assert highlight.highlights == ["keyword"] + + def test_children_empty(self) -> None: + highlight = Highlight() + assert list(highlight.children) == [] + + def test_children_with_content(self) -> None: + text = Text("highlighted") + highlight = Highlight(_children=[text]) + assert text in highlight.children + + def test_replace_child(self) -> None: + old = Text("old") + new = Text("new") + highlight = Highlight(_children=[old]) + + highlight.replace_child(old, new) + + assert new in highlight.children + assert old not in highlight.children + + def test_repr(self) -> None: + highlight = Highlight(highlights=["keyword", "function"]) + result = repr(highlight) + assert "Highlight" in result + assert "keyword" in result + + +class TestTranscribed: + def test_create(self) -> None: + transcribed = Transcribed() + assert list(transcribed.children) == [] + + def test_children(self) -> None: + line = Line(number=1) + transcribed = Transcribed(_children=[line]) + assert line in transcribed.children + + def test_replace_child(self) -> None: + old = Line(number=1) + new = Line(number=2) + transcribed = Transcribed(_children=[old]) + + transcribed.replace_child(old, new) + + assert new in transcribed.children + assert old not in transcribed.children + + def test_repr(self) -> None: + transcribed = Transcribed() + assert repr(transcribed) == "Transcribed(...)" + + +class TestFragment: + def test_create(self) -> None: + start = Pos(line=1, column=0) + end = Pos(line=1, column=10) + fragment = Fragment(start, end) + + assert fragment.start == start + assert fragment.end == end + assert fragment.highlights == [] + + def test_create_with_highlights(self) -> None: + start = Pos(line=1, column=0) + end = Pos(line=1, column=10) + fragment = Fragment(start, end, highlights=["keyword"]) + + assert fragment.highlights == ["keyword"] + + def test_repr(self) -> None: + start = Pos(line=1, column=0) + end = Pos(line=1, column=10) + fragment = Fragment(start, end, highlights=["test"]) + + result = repr(fragment) + assert "Fragment" in result + assert "1:0" in result + assert "1:10" in result + + +class TestVerbatim: + def test_create(self) -> None: + source = MockTextSource("content") + verbatim = Verbatim(source) + + assert verbatim.source is source + assert list(verbatim.children) == [] + + def test_repr(self) -> None: + source = MockTextSource("content") + verbatim = Verbatim(source) + + result = repr(verbatim) + assert "Verbatim" in result + + +class TestVerbatimNode: + def test_append(self) -> None: + source = MockTextSource("content") + verbatim = Verbatim(source) + fragment = Fragment(Pos(1, 0), Pos(1, 5)) + + verbatim.append(fragment) + + assert fragment in verbatim.children + + def test_append_nested_verbatim_raises(self) -> None: + source = MockTextSource("content") + outer = Verbatim(source) + inner = Verbatim(source) + + with pytest.raises(ValueError, match="cannot nest"): + outer.append(inner) + + def test_replace_child(self) -> None: + source = MockTextSource("content") + verbatim = Verbatim(source) + old = Fragment(Pos(1, 0), Pos(1, 5)) + new = Fragment(Pos(1, 0), Pos(1, 10)) + verbatim.append(old) + + verbatim.replace_child(old, new) + + assert new in verbatim.children + assert old not in verbatim.children + + +class TestBoundsVisitor: + def test_finds_start_end(self) -> None: + first_fragment = Fragment(Pos(1, 5), Pos(1, 10)) + second_fragment = Fragment(Pos(2, 0), Pos(2, 15)) + + source = MockTextSource("line1\nline2") + verbatim = Verbatim(source) + verbatim.append(first_fragment) + verbatim.append(second_fragment) + + visitor = _BoundsVisitor() + verbatim.visit(visitor) + + assert visitor.start == Pos(1, 5) + assert visitor.end == Pos(2, 15) + + def test_no_fragments(self) -> None: + visitor = _BoundsVisitor() + blank = BlankNode() + blank.visit(visitor) + + assert visitor.start is None + assert visitor.end is None + + +class ConcreteVerbatimVisitor(VerbatimVisitor): + lines: List[int] + texts: List[str] + highlights: List[Any] + + def __init__(self) -> None: + super().__init__() + self.lines = [] + self.texts = [] + self.highlights = [] + + def line(self, source: TextSource, line: int) -> None: + self.lines.append(line) + + def text(self, text: str) -> None: + self.texts.append(text) + + def begin_highlight(self, highlights: Sequence[str]) -> None: + self.highlights.append(("begin", list(highlights))) + + def end_highlight(self) -> None: + self.highlights.append(("end", None)) + + +class TestVerbatimVisitor: + def test_visit_verbatim_with_fragment(self) -> None: + source = MockTextSource("hello world") + verbatim = Verbatim(source) + fragment = Fragment(Pos(1, 0), Pos(1, 5), highlights=["keyword"]) + verbatim.append(fragment) + + visitor = ConcreteVerbatimVisitor() + verbatim.visit(visitor) + + assert 1 in visitor.lines + assert ("begin", ["keyword"]) in visitor.highlights + assert ("end", None) in visitor.highlights + + def test_visit_verbatim_multi_line_fragment(self) -> None: + source = MockTextSource("line one\nline two\nline three") + verbatim = Verbatim(source) + fragment = Fragment(Pos(1, 0), Pos(3, 10)) + verbatim.append(fragment) + + visitor = ConcreteVerbatimVisitor() + verbatim.visit(visitor) + + assert 1 in visitor.lines + assert 2 in visitor.lines + assert 3 in visitor.lines + + joined = "".join(visitor.texts) + assert "line one" in joined + assert "line two" in joined + assert "line three" in joined + + def test_visit_verbatim_multiple_fragments(self) -> None: + source = MockTextSource("alpha\nbeta\ngamma\ndelta") + verbatim = Verbatim(source) + first_fragment = Fragment(Pos(1, 0), Pos(2, 4)) + second_fragment = Fragment(Pos(3, 0), Pos(4, 5)) + verbatim.append(first_fragment) + verbatim.append(second_fragment) + + visitor = ConcreteVerbatimVisitor() + verbatim.visit(visitor) + + joined = "".join(visitor.texts) + assert "alpha" in joined + assert "beta" in joined + assert "gamma" in joined + assert "delta" in joined + + def test_nested_verbatim_raises(self) -> None: + # This test verifies the visitor's safety check against nested + # Verbatim. While Verbatim.append() prevents nesting at construction + # time, the visitor has an additional runtime check as defense-in- + # depth. We simulate already being inside a Verbatim by setting _depth. + source = MockTextSource("content") + outer = Verbatim(source) + + visitor = ConcreteVerbatimVisitor() + visitor._depth = 1 # Simulate already inside a Verbatim + + with pytest.raises(Exception, match="cannot be nested"): + visitor._enter_verbatim(outer) + + def test_fragment_outside_verbatim_raises(self) -> None: + fragment = Fragment(Pos(1, 0), Pos(1, 5)) + visitor = ConcreteVerbatimVisitor() + + with pytest.raises(Exception, match="must appear inside"): + visitor._enter_fragment(fragment) + + +class TestTranscribe: + def test_transform_simple(self, plugin_settings: PluginSettings) -> None: + source = MockTextSource("hello world") + verbatim = Verbatim(source) + fragment = Fragment(Pos(1, 0), Pos(1, 5)) + verbatim.append(fragment) + + document = Document(verbatim) + context = Context({Document: document}) + + transform = Transcribe(plugin_settings) + transform.transform(context) + + assert isinstance(document.root, Transcribed) + + def test_transcribe_multi_line( + self, plugin_settings: PluginSettings + ) -> None: + source = MockTextSource("line one\nline two\nline three") + verbatim = Verbatim(source) + fragment = Fragment(Pos(1, 0), Pos(3, 10)) + verbatim.append(fragment) + + document = Document(verbatim) + context = Context({Document: document}) + + transform = Transcribe(plugin_settings) + transform.transform(context) + + assert isinstance(document.root, Transcribed) + lines = [ + child + for child in document.root.children + if isinstance(child, Line) + ] + assert len(lines) == 3 + assert lines[0].number == 1 + assert lines[1].number == 2 + assert lines[2].number == 3 + + def _find_text(node: object) -> List[Text]: + found: List[Text] = [] + if isinstance(node, Text): + found.append(node) + if hasattr(node, "children"): + for child in node.children: # type: ignore[union-attr] + found.extend(_find_text(child)) + return found + + for line_node in lines: + text_nodes = _find_text(line_node) + assert len(text_nodes) > 0 + + def test_transform_no_verbatim( + self, plugin_settings: PluginSettings + ) -> None: + blank = BlankNode() + document = Document(blank) + context = Context({Document: document}) + + transform = Transcribe(plugin_settings) + transform.transform(context) + + assert document.root is blank + + +class TestVerbatimHtmlRendering: + """Tests for src/docc/plugins/verbatim/html.py render functions.""" + + def test_render_transcribed(self) -> None: + """render_transcribed produces an HTML table with class 'verbatim'.""" + from docc.plugins.verbatim.html import render_transcribed + + context = Context({}) + parent = HTMLTag("div") + node = Transcribed() + + result = render_transcribed(context, parent, node) + + assert isinstance(result, HTMLTag) + assert result.tag_name == "table" + assert result.attributes.get("class") == "verbatim" + # The table should be appended to parent + assert result in parent.children + + def test_render_line_inside_table(self) -> None: + """ + render_line produces a tr with th (line number) and td>pre + (code) when parent is a table. + """ + from docc.plugins.verbatim.html import render_line + + context = Context({}) + parent = HTMLTag("table") + node = Line(number=42) + + result = render_line(context, parent, node) + + # result should be the
 inside the 
+        assert isinstance(result, HTMLTag)
+        assert result.tag_name == "pre"
+
+        # Parent (table) should have a  child
+        assert len(list(parent.children)) == 1
+        tbody = list(parent.children)[0]
+        assert isinstance(tbody, HTMLTag)
+        assert tbody.tag_name == "tbody"
+
+        # tbody should contain a 
+        tr = list(tbody.children)[0]
+        assert isinstance(tr, HTMLTag)
+        assert tr.tag_name == "tr"
+
+        #  should contain  and 
+        tr_children = list(tr.children)
+        assert len(tr_children) == 2
+        th, td = tr_children
+        assert isinstance(th, HTMLTag)
+        assert th.tag_name == "th"
+        assert isinstance(td, HTMLTag)
+        assert td.tag_name == "td"
+
+        #  should contain TextNode with line number
+        th_children = list(th.children)
+        assert len(th_children) == 1
+        th_child = th_children[0]
+        assert isinstance(th_child, TextNode)
+        assert th_child._value == "42"
+
+    def test_render_line_outside_table(self) -> None:
+        """render_line appends  directly to non-table parents."""
+        from docc.plugins.verbatim.html import render_line
+
+        context = Context({})
+        parent = HTMLTag("div")
+        node = Line(number=1)
+
+        result = render_line(context, parent, node)
+
+        assert isinstance(result, HTMLTag)
+        assert result.tag_name == "pre"
+        # Parent (div) should have  directly (no )
+        assert len(list(parent.children)) == 1
+        tr = list(parent.children)[0]
+        assert isinstance(tr, HTMLTag)
+        assert tr.tag_name == "tr"
+
+    def test_render_text(self) -> None:
+        """render_text appends a TextNode with correct content."""
+        from docc.plugins.verbatim.html import render_text
+
+        context = Context({})
+        parent = HTMLTag("pre")
+        node = Text(text="hello world")
+
+        result = render_text(context, parent, node)
+
+        assert result is None
+        children = list(parent.children)
+        assert len(children) == 1
+        child = children[0]
+        assert isinstance(child, TextNode)
+        assert child._value == "hello world"
+
+    def test_render_highlight(self) -> None:
+        """render_highlight produces  with hi-{name} hi classes."""
+        from docc.plugins.verbatim.html import render_highlight
+
+        context = Context({})
+        parent = HTMLTag("pre")
+        node = Highlight(highlights=["keyword", "function"])
+
+        result = render_highlight(context, parent, node)
+
+        assert isinstance(result, HTMLTag)
+        assert result.tag_name == "span"
+        classes = result.attributes.get("class") or ""
+        assert "hi-keyword" in classes
+        assert "hi-function" in classes
+        assert "hi" in classes.split()
+        # The span should be appended to parent
+        assert result in parent.children
diff --git a/whitelist.txt b/whitelist.txt
index 96390b2..82b28be 100644
--- a/whitelist.txt
+++ b/whitelist.txt
@@ -1,19 +1,35 @@
 asts
 autoescape
+autolink
+binop
+blockquote
+boldnew
+caplog
 casefold
 charref
 charrefs
+chdir
 checkable
+collector1
+collector2
+commonpath
+conftest
 copyfileobj
 ctx
+dasherize
 decl
 defs
 delitem
+dest
+doc1
+doc2
 docc
-commonpath
-dasherize
+docstrings
+docstrings1
+docstrings2
 endtag
 entityref
+ep
 eq
 ethereum
 etree
@@ -22,9 +38,14 @@ exc
 expr
 fullname
 func
-href
 getitem
 globbed
+h1
+h2
+h3
+hashable
+href
+img
 isabstract
 islice
 j2
@@ -39,12 +60,16 @@ matcher
 merchantability
 metaclass
 modulefinder
+names1
+names2
+ol
 param
 params
 pathname2url
 perf
 Pos
 ptag
+pyproject
 qualname
 removeprefix
 renderer
@@ -54,20 +79,34 @@ ret
 rvalue
 setext
 setitem
+source1
+source2
 src
 starttag
 stmt
+stringio
 strikethrough
 subclasses
+subclassing
+subdir
+subdirectory
+subtrees
 superclass
 tbody
+td1
+td2
 thead
 this'll
 toml
 tomli
+tooltip
 tostring
-traverser
 traversable
+traverser
 typeddict
+types1
+types2
+unittest
+unlink
 unresolve
 urlunsplit

From 3d700a3af9d6f82d691a66d2c73562b48f04dd13 Mon Sep 17 00:00:00 2001
From: danceratopz 
Date: Tue, 24 Feb 2026 16:56:37 +0100
Subject: [PATCH 07/14] feat(perf): Cache entry_points() at module level in
 HTMLVisitor.

---
 pr.md                             | 28 +++++++++++++++
 src/docc/plugins/html/__init__.py | 17 +++++++--
 tests/test_html.py                | 58 +++++++++++++++++++++++++++++++
 3 files changed, 100 insertions(+), 3 deletions(-)
 create mode 100644 pr.md

diff --git a/pr.md b/pr.md
new file mode 100644
index 0000000..ebc05fc
--- /dev/null
+++ b/pr.md
@@ -0,0 +1,28 @@
+## What changed
+
+Cache the `entry_points(group="docc.plugins.html")` call at module level so it
+is computed once per process instead of once per `HTMLVisitor` instance. A new
+module-level variable `_HTML_ENTRY_POINTS` and a lazy-initializing helper
+`_get_html_entry_points()` replace the per-instance discovery in
+`HTMLVisitor.__init__`.
+
+## Why
+
+`HTMLVisitor` is instantiated once per document. On large projects (e.g.
+execution-specs with 2371 documents), the repeated `entry_points()` calls
+dominated the transform phase, accounting for the majority of a ~60 s runtime.
+Caching the result eliminates redundant package-metadata discovery and yields a
+significant speedup on the transform phase.
+
+## Test coverage
+
+Existing tests covered `HTMLVisitor` construction, rendering, and error paths
+but did not verify caching behavior. Four new tests were added to
+`tests/test_html.py` in the `TestEntryPointsCache` class:
+
+- **Behavioral test:** Two `HTMLVisitor` instances share the same (identity-equal)
+  `entry_points` dict and can both resolve known entry point names.
+- **Call-count / spy test:** Patches `entry_points()` and verifies it is called
+  exactly once across three `HTMLVisitor` instantiations.
+- **Cache-keying test:** Verifies the cached dict maps string keys to
+  `EntryPoint` objects and contains the expected `docc.document:BlankNode` key.
diff --git a/src/docc/plugins/html/__init__.py b/src/docc/plugins/html/__init__.py
index f9b8c41..1edee49 100644
--- a/src/docc/plugins/html/__init__.py
+++ b/src/docc/plugins/html/__init__.py
@@ -76,6 +76,19 @@
     from importlib.metadata import EntryPoint, entry_points
 
 
+# Module-level cache for HTML renderer entry points
+_HTML_ENTRY_POINTS: Optional[Dict[str, EntryPoint]] = None
+
+
+def _get_html_entry_points() -> Dict[str, EntryPoint]:
+    """Get cached HTML renderer entry points."""
+    global _HTML_ENTRY_POINTS
+    if _HTML_ENTRY_POINTS is None:
+        found = entry_points(group="docc.plugins.html")
+        _HTML_ENTRY_POINTS = {entry.name: entry for entry in found}
+    return _HTML_ENTRY_POINTS
+
+
 RenderResult = Optional[Union["HTMLTag", "HTMLRoot"]]
 """
 Possible output from rendering to HTML.
@@ -424,9 +437,7 @@ class HTMLVisitor(Visitor):
     context: Context
 
     def __init__(self, context: Context) -> None:
-        # Discover render functions.
-        found = entry_points(group="docc.plugins.html")
-        self.entry_points = {entry.name: entry for entry in found}
+        self.entry_points = _get_html_entry_points()
         self.root = HTMLRoot(context)
         self.stack = [self.root]
         self.renderers = {}
diff --git a/tests/test_html.py b/tests/test_html.py
index 84b682e..053408a 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -21,6 +21,7 @@
 
 import pytest
 
+import docc.plugins.html as html_module
 from docc.context import Context
 from docc.document import BlankNode, Document, ListNode, Node, Visit
 from docc.plugins.html import (
@@ -34,6 +35,7 @@
     HTMLVisitor,
     TextNode,
     _ElementTreeVisitor,
+    _get_html_entry_points,
     _make_relative,
     blank_node,
     html_tag,
@@ -677,3 +679,59 @@ def test_no_definitions_raises(self) -> None:
 
         with pytest.raises(ReferenceError):
             render_reference(context, ref)
+
+
+class TestEntryPointsCache:
+    """Tests for the module-level caching optimization."""
+
+    def setup_method(self) -> None:  # noqa: SC200
+        """Reset the module-level cache before each test."""
+        html_module._HTML_ENTRY_POINTS = None
+
+    def teardown_method(self) -> None:  # noqa: SC200
+        """Reset the module-level cache after each test."""
+        html_module._HTML_ENTRY_POINTS = None
+
+    def test_two_visitors_share_entry_points(self) -> None:
+        """Two HTMLVisitor instances share the cached dict."""
+        context = Context({})
+        first = HTMLVisitor(context)
+        second = HTMLVisitor(context)
+
+        assert first.entry_points is second.entry_points
+        assert first.entry_points == second.entry_points
+
+    def test_both_visitors_resolve_known_key(self) -> None:
+        """Both visitors can resolve a known entry point."""
+        context = Context({})
+        first = HTMLVisitor(context)
+        second = HTMLVisitor(context)
+
+        key = "docc.document:BlankNode"
+        assert key in first.entry_points
+        assert key in second.entry_points
+
+    def test_single_call_for_multiple_visitors(  # noqa: SC200
+        self,
+    ) -> None:
+        """Only one entry_points() call across visitors."""
+        ep_path = "docc.plugins.html.entry_points"
+        with patch(ep_path, wraps=html_module.entry_points) as mock_ep:
+            HTMLVisitor(Context({}))
+            HTMLVisitor(Context({}))
+            HTMLVisitor(Context({}))
+
+            mock_ep.assert_called_once_with(group="docc.plugins.html")
+
+    def test_cached_dict_maps_names_to_objects(self) -> None:
+        """Cached dict maps names to EntryPoint objects."""
+        cached = _get_html_entry_points()
+
+        assert isinstance(cached, dict)
+        assert len(cached) > 0
+
+        for name, ep in cached.items():
+            assert isinstance(name, str)
+            assert isinstance(ep, html_module.EntryPoint)
+
+        assert "docc.document:BlankNode" in cached

From 5ef3f2f98e5e18784e7a70f53fc5189f5b5246af Mon Sep 17 00:00:00 2001
From: danceratopz 
Date: Tue, 24 Feb 2026 17:02:45 +0100
Subject: [PATCH 08/14] feat(perf): Cache entry_points() at module level in
 Loader.

---
 src/docc/plugins/loader.py | 18 ++++++--
 tests/test_loader.py       | 91 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 104 insertions(+), 5 deletions(-)

diff --git a/src/docc/plugins/loader.py b/src/docc/plugins/loader.py
index 98e8c7f..b4117b1 100644
--- a/src/docc/plugins/loader.py
+++ b/src/docc/plugins/loader.py
@@ -19,7 +19,7 @@
 
 import sys
 from inspect import isabstract
-from typing import Callable, Dict, Type, TypeVar
+from typing import Callable, Dict, Optional, Type, TypeVar
 
 if sys.version_info < (3, 10):
     from importlib_metadata import EntryPoint, entry_points
@@ -27,6 +27,19 @@
     from importlib.metadata import EntryPoint, entry_points
 
 
+# Module-level cache for entry_points() to avoid repeated look-ups
+_PLUGIN_ENTRY_POINTS: Optional[Dict[str, EntryPoint]] = None
+
+
+def _get_plugin_entry_points() -> Dict[str, EntryPoint]:
+    """Get cached plugin entry points, loading on first access."""
+    global _PLUGIN_ENTRY_POINTS
+    if _PLUGIN_ENTRY_POINTS is None:
+        found = set(entry_points(group="docc.plugins"))
+        _PLUGIN_ENTRY_POINTS = {entry.name: entry for entry in found}
+    return _PLUGIN_ENTRY_POINTS
+
+
 class PluginError(Exception):
     """
     An error encountered while loading a plugin.
@@ -47,8 +60,7 @@ def __init__(self) -> None:
         """
         Create an instance and populate it with the discovered plugins.
         """
-        found = set(entry_points(group="docc.plugins"))
-        self.entry_points = {entry.name: entry for entry in found}
+        self.entry_points = _get_plugin_entry_points()
 
     def load(self, base: Type[L], name: str) -> Callable[..., L]:
         """
diff --git a/tests/test_loader.py b/tests/test_loader.py
index 18625a2..df61f87 100644
--- a/tests/test_loader.py
+++ b/tests/test_loader.py
@@ -13,13 +13,20 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see .
 
-from unittest.mock import MagicMock
+import sys
+from unittest.mock import MagicMock, patch
 
 import pytest
 
+if sys.version_info < (3, 10):
+    from importlib_metadata import EntryPoint
+else:
+    from importlib.metadata import EntryPoint
+
+import docc.plugins.loader as loader_module
 from docc.build import Builder
 from docc.discover import Discover
-from docc.plugins.loader import Loader, PluginError
+from docc.plugins.loader import Loader, PluginError, _get_plugin_entry_points
 from docc.transform import Transform
 
 
@@ -121,3 +128,83 @@ def test_load_multiple_discovers(self) -> None:
             cls = loader.load(Discover, name)
             assert isinstance(cls, type)
             assert issubclass(cls, Discover)
+
+
+class TestLoaderCacheBehavioral:
+    """Behavioral tests: two Loader instances share cached entry points."""
+
+    def test_two_loaders_share_identical_entry_points(self) -> None:
+        """Two Loader instances must have identical entry_points dicts."""
+        loader_a = Loader()
+        loader_b = Loader()
+        assert loader_a.entry_points is loader_b.entry_points
+
+    def test_both_loaders_can_load_known_plugin(self) -> None:
+        """Both Loader instances can load a known plugin after caching."""
+        loader_a = Loader()
+        loader_b = Loader()
+        cls_a = loader_a.load(Discover, "docc.python.discover")
+        cls_b = loader_b.load(Discover, "docc.python.discover")
+        assert cls_a is cls_b
+        assert callable(cls_a)
+
+
+class TestLoaderCacheCallCount:
+    """Spy tests: entry_points() called once across instances."""
+
+    def setup_method(self) -> None:  # noqa: SC200
+        """Reset the module-level cache before each test."""
+        loader_module._PLUGIN_ENTRY_POINTS = None
+
+    def teardown_method(self) -> None:  # noqa: SC200
+        """Reset the module-level cache after each test."""
+        loader_module._PLUGIN_ENTRY_POINTS = None
+
+    def test_entry_points_called_once_for_multiple_loaders(
+        self,
+    ) -> None:
+        """Multiple Loader instances call entry_points() once."""
+        with patch(
+            "docc.plugins.loader.entry_points",
+            wraps=loader_module.entry_points,
+        ) as mock_ep:
+            Loader()
+            Loader()
+            Loader()
+            mock_ep.assert_called_once_with(group="docc.plugins")
+
+    def test_get_plugin_entry_points_calls_once(self) -> None:
+        """Repeated _get_plugin_entry_points() calls entry_points() once."""
+        with patch(
+            "docc.plugins.loader.entry_points",
+            wraps=loader_module.entry_points,
+        ) as mock_ep:
+            _get_plugin_entry_points()
+            _get_plugin_entry_points()
+            mock_ep.assert_called_once_with(group="docc.plugins")
+
+
+class TestLoaderCacheKeying:
+    """Cache-keying tests: the cached dict has correct structure."""
+
+    def test_cache_maps_names_to_entry_point_objects(self) -> None:
+        """The cached dict maps string names to EntryPoint instances."""
+        loader = Loader()
+        for name, ep in loader.entry_points.items():
+            assert isinstance(name, str), "Keys must be strings."
+            assert isinstance(
+                ep, EntryPoint
+            ), "Values must be EntryPoint instances."
+
+    def test_cache_contains_known_entry_points(self) -> None:
+        """The cached dict includes known plugin entry point names."""
+        loader = Loader()
+        expected_names = [
+            "docc.python.discover",
+            "docc.python.build",
+            "docc.python.transform",
+        ]
+        for name in expected_names:
+            assert (
+                name in loader.entry_points
+            ), f"Expected entry point '{name}' not found in cache."

From 08e66708339dbf4fad226496c1b1380f04822ebf Mon Sep 17 00:00:00 2001
From: danceratopz 
Date: Tue, 24 Feb 2026 17:01:51 +0100
Subject: [PATCH 09/14] feat(perf): Cache dataclasses.fields() at class level
 in PythonNode.

---
 src/docc/plugins/python/nodes.py |  27 ++++++-
 tests/test_python_cst.py         | 122 +++++++++++++++++++++++++++++++
 2 files changed, 146 insertions(+), 3 deletions(-)

diff --git a/src/docc/plugins/python/nodes.py b/src/docc/plugins/python/nodes.py
index e92c923..fd9bade 100644
--- a/src/docc/plugins/python/nodes.py
+++ b/src/docc/plugins/python/nodes.py
@@ -20,7 +20,16 @@
 import dataclasses
 import typing
 from dataclasses import dataclass, fields
-from typing import Iterable, Literal, Optional, Sequence, Union
+from typing import (
+    Any,
+    ClassVar,
+    Dict,
+    Iterable,
+    Literal,
+    Optional,
+    Sequence,
+    Union,
+)
 
 from docc.document import BlankNode, ListNode, Node, Visit, Visitor
 from docc.plugins.search import Content, Searchable
@@ -31,12 +40,24 @@ class PythonNode(Node):
     Base implementation of Node operations for Python nodes.
     """
 
+    # Class-level cache for dataclass fields (populated lazily)
+    _fields_cache: ClassVar[
+        Dict[type[Any], tuple[dataclasses.Field[Any], ...]]
+    ] = {}
+
+    @classmethod
+    def _get_fields(cls) -> tuple[dataclasses.Field[Any], ...]:
+        """Get cached dataclass fields for this class."""
+        if cls not in cls._fields_cache:
+            cls._fields_cache[cls] = tuple(fields(cls))
+        return cls._fields_cache[cls]
+
     @property
     def children(self) -> Iterable[Node]:
         """
         Child nodes belonging to this node.
         """
-        for field in fields(self):
+        for field in self._get_fields():
             value = getattr(self, field.name)
 
             if field.type == Node:
@@ -51,7 +72,7 @@ def replace_child(self, old: Node, new: Node) -> None:
         """
         Replace the old node with the given new node.
         """
-        for field in fields(self):
+        for field in self._get_fields():
             value = getattr(self, field.name)
             if value == old:
                 assert isinstance(new, field.type)
diff --git a/tests/test_python_cst.py b/tests/test_python_cst.py
index 91a060a..93afa22 100644
--- a/tests/test_python_cst.py
+++ b/tests/test_python_cst.py
@@ -13,10 +13,12 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see .
 
+import dataclasses
 import tempfile
 from collections.abc import Iterator
 from pathlib import Path, PurePath
 from typing import Dict, Mapping, Set
+from unittest.mock import patch
 
 import pytest
 
@@ -503,3 +505,123 @@ def test_children_raises_type_error_for_non_node_field(self) -> None:
 
         with pytest.raises(TypeError, match="child not Node"):
             list(module.children)
+
+
+class TestFieldsCacheBehavioral:
+    """Behavioral tests for children and replace_child with caching."""
+
+    def setup_method(self) -> None:
+        nodes.PythonNode._fields_cache.clear()
+
+    def test_module_children_yields_expected_nodes(self) -> None:
+        """Module.children should yield all three default child nodes."""
+        module = nodes.Module()
+        children = list(module.children)
+        assert len(children) == 3
+        assert children[0] is module.name
+        assert children[1] is module.docstring
+        assert children[2] is module.members
+
+    def test_function_children_yields_expected_nodes(self) -> None:
+        """Function.children should yield all Node-typed fields."""
+        func = nodes.Function(asynchronous=False)
+        children = list(func.children)
+        # Function has 6 Node fields: decorators, name, parameters,
+        # return_type, docstring, body.
+        assert len(children) == 6
+
+    def test_class_children_yields_expected_nodes(self) -> None:
+        """Class.children should yield all Node-typed fields."""
+        cls = nodes.Class()
+        children = list(cls.children)
+        # Class has 6 Node fields: decorators, name, bases, metaclass,
+        # docstring, members.
+        assert len(children) == 6
+
+    def test_replace_child_works_with_cache(self) -> None:
+        """replace_child should correctly swap a child node."""
+        module = nodes.Module()
+        old_name = module.name
+        new_name = nodes.Name("replaced")
+        module.replace_child(old_name, new_name)
+        assert module.name is new_name
+
+
+class TestFieldsCacheCallCount:
+    """Spy tests verifying fields() is called once per subclass."""
+
+    def setup_method(self) -> None:
+        nodes.PythonNode._fields_cache.clear()
+
+    def test_fields_called_once_for_multiple_module_instances(self) -> None:
+        """Multiple Module instances should trigger only one fields() call."""
+        with patch(
+            "docc.plugins.python.nodes.fields", wraps=dataclasses.fields
+        ) as mock_fields:
+            first = nodes.Module()
+            second = nodes.Module()
+            list(first.children)
+            list(second.children)
+            list(first.children)
+            mock_fields.assert_called_once()
+
+    def test_fields_called_once_per_distinct_subclass(self) -> None:
+        """Each distinct subclass should trigger exactly one fields() call."""
+        with patch(
+            "docc.plugins.python.nodes.fields", wraps=dataclasses.fields
+        ) as mock_fields:
+            module = nodes.Module()
+            func = nodes.Function(asynchronous=False)
+            list(module.children)
+            list(func.children)
+            assert mock_fields.call_count == 2
+
+    def test_replace_child_does_not_trigger_extra_fields_call(self) -> None:
+        """replace_child should reuse cached fields without extra calls."""
+        with patch(
+            "docc.plugins.python.nodes.fields", wraps=dataclasses.fields
+        ) as mock_fields:
+            module = nodes.Module()
+            list(module.children)
+            assert mock_fields.call_count == 1
+
+            old_name = module.name
+            module.replace_child(old_name, nodes.Name("new"))
+            # Still only one call; replace_child used the cache.
+            assert mock_fields.call_count == 1
+
+
+class TestFieldsCacheKeying:
+    """Tests verifying cache keys and stored value types."""
+
+    def setup_method(self) -> None:
+        nodes.PythonNode._fields_cache.clear()
+
+    def test_cache_populated_with_correct_keys(self) -> None:
+        """Cache should contain Module and Function type keys."""
+        module = nodes.Module()
+        func = nodes.Function(asynchronous=False)
+        list(module.children)
+        list(func.children)
+
+        assert nodes.Module in nodes.PythonNode._fields_cache
+        assert nodes.Function in nodes.PythonNode._fields_cache
+
+    def test_cached_values_are_tuples_of_field(self) -> None:
+        """Cached values should be tuples of dataclasses.Field objects."""
+        module = nodes.Module()
+        list(module.children)
+
+        cached = nodes.PythonNode._fields_cache[nodes.Module]
+        assert isinstance(cached, tuple)
+        for item in cached:
+            assert isinstance(item, dataclasses.Field)
+
+    def test_cache_empty_after_clear(self) -> None:
+        """Clearing the cache should remove all entries."""
+        module = nodes.Module()
+        list(module.children)
+        assert len(nodes.PythonNode._fields_cache) > 0
+
+        nodes.PythonNode._fields_cache.clear()
+        assert len(nodes.PythonNode._fields_cache) == 0

From c15b457318ac44b80987e33a983dc0d33d7193b7 Mon Sep 17 00:00:00 2001
From: danceratopz 
Date: Tue, 24 Feb 2026 17:00:18 +0100
Subject: [PATCH 10/14] feat(perf): Cache _BoundsVisitor results to eliminate
 double tree traversal.

---
 src/docc/plugins/verbatim/__init__.py |  40 +++++---
 tests/test_verbatim.py                | 132 +++++++++++++++++++++++++-
 2 files changed, 159 insertions(+), 13 deletions(-)

diff --git a/src/docc/plugins/verbatim/__init__.py b/src/docc/plugins/verbatim/__init__.py
index f54d850..83fafbf 100644
--- a/src/docc/plugins/verbatim/__init__.py
+++ b/src/docc/plugins/verbatim/__init__.py
@@ -21,7 +21,16 @@
 import logging
 from abc import abstractmethod
 from dataclasses import dataclass, field
-from typing import Final, Iterable, List, Optional, Sequence, Tuple, Union
+from typing import (
+    Dict,
+    Final,
+    Iterable,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Union,
+)
 
 from docc.context import Context
 from docc.document import Document, Node, Visit, Visitor
@@ -290,11 +299,22 @@ class VerbatimVisitor(Visitor):
 
     _depth: Optional[int]
     _verbatim: Optional[_VerbatimContext]
+    _bounds_cache: Dict[int, Tuple[Optional[Pos], Optional[Pos]]]
 
     def __init__(self) -> None:
         super().__init__()
         self._depth = None
         self._verbatim = None
+        self._bounds_cache = {}
+
+    def _get_bounds(self, node: Node) -> Tuple[Optional[Pos], Optional[Pos]]:
+        """Get cached bounds for a node, computing if necessary."""
+        node_id = id(node)
+        if node_id not in self._bounds_cache:
+            visitor = _BoundsVisitor()
+            node.visit(visitor)
+            self._bounds_cache[node_id] = (visitor.start, visitor.end)
+        return self._bounds_cache[node_id]
 
     #
     # Override in Subclasses:
@@ -434,11 +454,9 @@ def enter(self, node: Node) -> Visit:
         elif isinstance(node, Verbatim):
             return self._enter_verbatim(node)
         else:
-            # TODO: Save the results somewhere so we don't visit twice.
-            visitor = _BoundsVisitor()
-            node.visit(visitor)
-            if visitor.start is not None:
-                self._copy(visitor.start.line, visitor.start)
+            start, _ = self._get_bounds(node)
+            if start is not None:
+                self._copy(start.line, start)
             return self.enter_node(node)
 
     def exit(self, node: Node) -> None:
@@ -450,12 +468,10 @@ def exit(self, node: Node) -> None:
         elif isinstance(node, Verbatim):
             return self._exit_verbatim(node)
         else:
-            # TODO: Save the results somewhere so we don't visit twice.
-            visitor = _BoundsVisitor()
-            node.visit(visitor)
-            if visitor.end is not None:
-                start = visitor.start or visitor.end
-                self._copy(start.line, visitor.end)
+            start, end = self._get_bounds(node)
+            if end is not None:
+                start_pos = start or end
+                self._copy(start_pos.line, end)
             return self.exit_node(node)
 
 
diff --git a/tests/test_verbatim.py b/tests/test_verbatim.py
index d9302df..11bd038 100644
--- a/tests/test_verbatim.py
+++ b/tests/test_verbatim.py
@@ -16,11 +16,12 @@
 from io import StringIO
 from pathlib import Path, PurePath
 from typing import Any, List, Optional, Sequence, TextIO
+from unittest.mock import patch
 
 import pytest
 
 from docc.context import Context
-from docc.document import BlankNode, Document
+from docc.document import BlankNode, Document, ListNode
 from docc.plugins.html import HTMLTag, TextNode
 from docc.plugins.verbatim import (
     Fragment,
@@ -568,3 +569,132 @@ def test_render_highlight(self) -> None:
         assert "hi" in classes.split()
         # The span should be appended to parent
         assert result in parent.children
+
+
+class TestBoundsCacheBehavior:
+    """Caching _BoundsVisitor results must not change output."""
+
+    def test_cached_bounds_produce_same_text_output(self) -> None:
+        """Caching does not change text output for wrapped Fragments."""
+        source = MockTextSource("alpha\nbeta\ngamma")
+        verbatim = Verbatim(source)
+
+        wrapper = ListNode([Fragment(Pos(1, 0), Pos(2, 4))])
+        verbatim.append(wrapper)
+
+        visitor = ConcreteVerbatimVisitor()
+        verbatim.visit(visitor)
+
+        joined = "".join(visitor.texts)
+        assert "alpha" in joined
+        assert "beta" in joined
+
+    def test_multiple_wrappers_produce_correct_output(self) -> None:
+        """Multiple wrappers with Fragments produce correct text."""
+        source = MockTextSource("aaa\nbbb\nccc\nddd")
+        verbatim = Verbatim(source)
+
+        first_wrapper = ListNode([Fragment(Pos(1, 0), Pos(2, 3))])
+        second_wrapper = ListNode([Fragment(Pos(3, 0), Pos(4, 3))])
+        verbatim.append(first_wrapper)
+        verbatim.append(second_wrapper)
+
+        visitor = ConcreteVerbatimVisitor()
+        verbatim.visit(visitor)
+
+        joined = "".join(visitor.texts)
+        assert "aaa" in joined
+        assert "bbb" in joined
+        assert "ccc" in joined
+        assert "ddd" in joined
+
+
+class TestBoundsCacheCallCount:
+    """The cache must eliminate redundant _BoundsVisitor creation."""
+
+    def test_single_wrapper_visited_once(self) -> None:
+        """Enter and exit of one node creates only one _BoundsVisitor."""
+        source = MockTextSource("hello\nworld")
+        verbatim = Verbatim(source)
+
+        wrapper = ListNode([Fragment(Pos(1, 0), Pos(2, 5))])
+        verbatim.append(wrapper)
+
+        visitor = ConcreteVerbatimVisitor()
+
+        with patch(
+            "docc.plugins.verbatim._BoundsVisitor",
+            wraps=_BoundsVisitor,
+        ) as mock_cls:
+            verbatim.visit(visitor)
+            # One wrapper node -> one _BoundsVisitor creation (not two).
+            assert mock_cls.call_count == 1
+
+    def test_two_wrappers_visited_once_each(self) -> None:
+        """Two distinct nodes each create exactly one _BoundsVisitor."""
+        source = MockTextSource("aaa\nbbb\nccc\nddd")
+        verbatim = Verbatim(source)
+
+        first_wrapper = ListNode([Fragment(Pos(1, 0), Pos(2, 3))])
+        second_wrapper = ListNode([Fragment(Pos(3, 0), Pos(4, 3))])
+        verbatim.append(first_wrapper)
+        verbatim.append(second_wrapper)
+
+        visitor = ConcreteVerbatimVisitor()
+
+        with patch(
+            "docc.plugins.verbatim._BoundsVisitor",
+            wraps=_BoundsVisitor,
+        ) as mock_cls:
+            verbatim.visit(visitor)
+            # Two unique wrappers -> exactly two creations.
+            assert mock_cls.call_count == 2
+
+
+class TestBoundsCacheKeying:
+    """The _bounds_cache dictionary must be keyed by id(node)."""
+
+    def test_cache_contains_wrapper_ids(self) -> None:
+        """Cache has entries keyed by id(node) for each wrapper node."""
+        source = MockTextSource("hello\nworld")
+        verbatim = Verbatim(source)
+
+        wrapper = ListNode([Fragment(Pos(1, 0), Pos(2, 5))])
+        verbatim.append(wrapper)
+
+        visitor = ConcreteVerbatimVisitor()
+        verbatim.visit(visitor)
+
+        assert id(wrapper) in visitor._bounds_cache
+
+    def test_cache_values_are_start_end_tuples(self) -> None:
+        """Cached values must be (start, end) tuples of Optional[Pos]."""
+        source = MockTextSource("hello\nworld")
+        verbatim = Verbatim(source)
+
+        wrapper = ListNode([Fragment(Pos(1, 0), Pos(2, 5))])
+        verbatim.append(wrapper)
+
+        visitor = ConcreteVerbatimVisitor()
+        verbatim.visit(visitor)
+
+        start, end = visitor._bounds_cache[id(wrapper)]
+        assert start == Pos(1, 0)
+        assert end == Pos(2, 5)
+
+    def test_cache_has_entry_per_wrapper(self) -> None:
+        """Each non-Fragment/non-Verbatim node gets its own cache entry."""
+        source = MockTextSource("aaa\nbbb\nccc\nddd")
+        verbatim = Verbatim(source)
+
+        first_wrapper = ListNode([Fragment(Pos(1, 0), Pos(2, 3))])
+        second_wrapper = ListNode([Fragment(Pos(3, 0), Pos(4, 3))])
+        verbatim.append(first_wrapper)
+        verbatim.append(second_wrapper)
+
+        visitor = ConcreteVerbatimVisitor()
+        verbatim.visit(visitor)
+
+        assert id(first_wrapper) in visitor._bounds_cache
+        assert id(second_wrapper) in visitor._bounds_cache
+        assert len(visitor._bounds_cache) == 2

From d95868148940712c115c2c33a102c15ca22793b6 Mon Sep 17 00:00:00 2001
From: danceratopz 
Date: Tue, 24 Feb 2026 16:57:20 +0100
Subject: [PATCH 11/14] feat(perf): Cache file lines in TextSource.line() to
 avoid repeated file reads.

---
 src/docc/source.py   | 27 +++++++++++++++++----------
 tests/test_source.py | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/src/docc/source.py b/src/docc/source.py
index 6e0dd38..110921c 100644
--- a/src/docc/source.py
+++ b/src/docc/source.py
@@ -19,7 +19,7 @@
 
 from abc import ABC, abstractmethod
 from pathlib import PurePath
-from typing import Optional, TextIO
+from typing import List, Optional, TextIO
 
 
 class Source(ABC):
@@ -62,22 +62,29 @@ class TextSource(Source):
     A Source that supports reading text snippets.
     """
 
+    _lines_cache: Optional[List[str]] = None
+
     @abstractmethod
     def open(self) -> TextIO:
         """
         Open the source for reading.
         """
 
+    def _get_lines(self) -> List[str]:
+        """Get cached lines, loading from file on first access."""
+        if self._lines_cache is None:
+            with self.open() as f:
+                self._lines_cache = f.read().split("\n")
+        return self._lines_cache
+
     def line(self, number: int) -> str:
         """
         Extract a line of text from the source.
         """
-        # TODO: Don't reopen and reread the file every time...
-        with self.open() as f:
-            lines = f.read().split("\n")
-            try:
-                return lines[number - 1]
-            except IndexError as e:
-                raise IndexError(
-                    f"line {number} out of range for `{self.relative_path}`"
-                ) from e
+        lines = self._get_lines()
+        try:
+            return lines[number - 1]
+        except IndexError as e:
+            raise IndexError(
+                f"line {number} out of range for `{self.relative_path}`"
+            ) from e
diff --git a/tests/test_source.py b/tests/test_source.py
index e61ce25..4579ed0 100644
--- a/tests/test_source.py
+++ b/tests/test_source.py
@@ -17,6 +17,7 @@
 from io import StringIO
 from pathlib import Path, PurePath
 from typing import Optional, TextIO
+from unittest.mock import patch
 
 import pytest
 
@@ -182,3 +183,40 @@ def open(self) -> TextIO:
             assert source.line(2) == "# Line 2"
         finally:
             path.unlink()
+
+
+class TestTextSourceCache:
+    def test_open_called_once_for_multiple_line_calls(self) -> None:
+        """Calling line() multiple times should only open the file once."""
+        content = "line1\nline2\nline3"
+        source = ConcreteTextSource(content)
+
+        with patch.object(source, "open", wraps=source.open) as mock_open:
+            source.line(1)
+            source.line(2)
+            source.line(3)
+
+            mock_open.assert_called_once()
+
+    def test_lines_cache_matches_content_split(self) -> None:
+        """After calling line(), _lines_cache matches content split."""
+        content = "alpha\nbeta\ngamma"
+        source = ConcreteTextSource(content)
+
+        source.line(1)
+
+        assert source._lines_cache == ["alpha", "beta", "gamma"]
+
+    def test_lines_cache_empty_content(self) -> None:
+        """Cache for empty content should be a list with one empty string."""
+        source = ConcreteTextSource("")
+
+        source.line(1)
+
+        assert source._lines_cache == [""]
+
+    def test_lines_cache_is_none_before_first_call(self) -> None:
+        """Before any line() call, the cache should be None."""
+        source = ConcreteTextSource("some content")
+
+        assert source._lines_cache is None

From 76406a9d2c8541079d5fb8d99cd13a42963b2cfb Mon Sep 17 00:00:00 2001
From: danceratopz 
Date: Tue, 24 Feb 2026 17:02:52 +0100
Subject: [PATCH 12/14] feat(perf): Cache Jinja2 environments in HTML and
 listing plugins.

---
 pr.md                                | 28 -----------------
 src/docc/plugins/html/__init__.py    | 36 +++++++++++++++-------
 src/docc/plugins/listing/__init__.py | 21 +++++++++----
 tests/test_html.py                   | 45 ++++++++++++++++++++++++++++
 tests/test_listing.py                | 27 +++++++++++++++++
 whitelist.txt                        |  3 ++
 6 files changed, 116 insertions(+), 44 deletions(-)
 delete mode 100644 pr.md

diff --git a/pr.md b/pr.md
deleted file mode 100644
index ebc05fc..0000000
--- a/pr.md
+++ /dev/null
@@ -1,28 +0,0 @@
-## What changed
-
-Cache the `entry_points(group="docc.plugins.html")` call at module level so it
-is computed once per process instead of once per `HTMLVisitor` instance. A new
-module-level variable `_HTML_ENTRY_POINTS` and a lazy-initializing helper
-`_get_html_entry_points()` replace the per-instance discovery in
-`HTMLVisitor.__init__`.
-
-## Why
-
-`HTMLVisitor` is instantiated once per document. On large projects (e.g.
-execution-specs with 2371 documents), the repeated `entry_points()` calls
-dominated the transform phase, accounting for the majority of a ~60 s runtime.
-Caching the result eliminates redundant package-metadata discovery and yields a
-significant speedup on the transform phase.
-
-## Test coverage
-
-Existing tests covered `HTMLVisitor` construction, rendering, and error paths
-but did not verify caching behavior. Four new tests were added to
-`tests/test_html.py` in the `TestEntryPointsCache` class:
-
-- **Behavioral test:** Two `HTMLVisitor` instances share the same (identity-equal)
-  `entry_points` dict and can both resolve known entry point names.
-- **Call-count / spy test:** Patches `entry_points()` and verifies it is called
-  exactly once across three `HTMLVisitor` instantiations.
-- **Cache-keying test:** Verifies the cached dict maps string keys to
-  `EntryPoint` objects and contains the expected `docc.document:BlankNode` key.
diff --git a/src/docc/plugins/html/__init__.py b/src/docc/plugins/html/__init__.py
index 1edee49..c174ed4 100644
--- a/src/docc/plugins/html/__init__.py
+++ b/src/docc/plugins/html/__init__.py
@@ -89,6 +89,28 @@ def _get_html_entry_points() -> Dict[str, EntryPoint]:
     return _HTML_ENTRY_POINTS
 
 
+# Module-level cache for Jinja2 environments
+_JINJA_ENVS: Dict[str, Environment] = {}
+
+
+def _get_jinja_env(
+    package: str, with_reference_extension: bool = False
+) -> Environment:
+    """Get cached Jinja2 environment for a package."""
+    cache_key = f"{package}:{with_reference_extension}"
+    if cache_key not in _JINJA_ENVS:
+        extensions = [_ReferenceExtension] if with_reference_extension else []
+        env = Environment(
+            extensions=extensions,
+            loader=PackageLoader(package),
+            autoescape=select_autoescape(),
+        )
+        env.filters["html"] = _html_filter
+        env.filters["find"] = _find_filter
+        _JINJA_ENVS[cache_key] = env
+    return _JINJA_ENVS[cache_key]
+
+
 RenderResult = Optional[Union["HTMLTag", "HTMLRoot"]]
 """
 Possible output from rendering to HTML.
@@ -328,10 +350,8 @@ def output(self, context: Context, destination: TextIOBase) -> None:
             markup = ET.tostring(element, encoding="unicode", method="html")
             rendered.write(markup)
 
-        env = Environment(
-            extensions=[_ReferenceExtension],
-            loader=PackageLoader("docc.plugins.html"),
-            autoescape=select_autoescape(),
+        env = _get_jinja_env(
+            "docc.plugins.html", with_reference_extension=True
         )
         template = env.get_template("base.html")
         body = rendered.getvalue()
@@ -751,13 +771,7 @@ def render_template(
     Render a template as a child of the given parent.
     """
     static_path = _static_path_from(context)
-    env = Environment(
-        extensions=[_ReferenceExtension],
-        loader=PackageLoader(package),
-        autoescape=select_autoescape(),
-    )
-    env.filters["html"] = _html_filter
-    env.filters["find"] = _find_filter
+    env = _get_jinja_env(package, with_reference_extension=True)
     template = env.get_template(template_name)
     parser = HTMLParser(context)
     parser.feed(
diff --git a/src/docc/plugins/listing/__init__.py b/src/docc/plugins/listing/__init__.py
index 1aeb808..39014b9 100644
--- a/src/docc/plugins/listing/__init__.py
+++ b/src/docc/plugins/listing/__init__.py
@@ -20,7 +20,7 @@
 from abc import ABC, abstractmethod
 from os.path import commonpath
 from pathlib import PurePath
-from typing import Dict, Final, FrozenSet, Iterator, Set, Tuple
+from typing import Dict, Final, FrozenSet, Iterator, Optional, Set, Tuple
 
 from jinja2 import Environment, PackageLoader, select_autoescape
 
@@ -32,6 +32,20 @@
 from docc.settings import PluginSettings
 from docc.source import Source
 
+# Module-level cache for Jinja2 environment
+_LISTING_ENV: Optional[Environment] = None
+
+
+def _get_listing_env() -> Environment:
+    """Get cached Jinja2 environment for listing templates."""
+    global _LISTING_ENV
+    if _LISTING_ENV is None:
+        _LISTING_ENV = Environment(
+            loader=PackageLoader("docc.plugins.listing"),
+            autoescape=select_autoescape(),
+        )
+    return _LISTING_ENV
+
 
 class Listable(ABC):
     """
@@ -207,10 +221,7 @@ def render_html(
 
     entries.sort()
 
-    env = Environment(
-        loader=PackageLoader("docc.plugins.listing"),
-        autoescape=select_autoescape(),
-    )
+    env = _get_listing_env()
     template = env.get_template("listing.html")
     parser = html.HTMLParser(context)
     parser.feed(template.render(context=context, entries=entries))
diff --git a/tests/test_html.py b/tests/test_html.py
index 053408a..a206953 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -20,11 +20,13 @@
 from unittest.mock import patch
 
 import pytest
+from jinja2 import Environment
 
 import docc.plugins.html as html_module
 from docc.context import Context
 from docc.document import BlankNode, Document, ListNode, Node, Visit
 from docc.plugins.html import (
+    _JINJA_ENVS,
     HTML,
     HTMLContext,
     HTMLDiscover,
@@ -36,6 +38,7 @@
     TextNode,
     _ElementTreeVisitor,
     _get_html_entry_points,
+    _get_jinja_env,
     _make_relative,
     blank_node,
     html_tag,
@@ -735,3 +738,45 @@ def test_cached_dict_maps_names_to_objects(self) -> None:
             assert isinstance(ep, html_module.EntryPoint)
 
         assert "docc.document:BlankNode" in cached
+
+
+class TestJinjaEnvCache:
+    def setup_method(self) -> None:
+        """Clear the module-level cache before each test."""
+        _JINJA_ENVS.clear()
+
+    def test_returns_environment_with_filters_and_extensions(self) -> None:
+        """Verify _get_jinja_env returns an env with filters."""
+        env = _get_jinja_env(
+            "docc.plugins.html", with_reference_extension=True
+        )
+        assert isinstance(env, Environment)
+        assert "html" in env.filters
+        assert "find" in env.filters
+        assert len(env.extensions) > 0
+
+    def test_same_args_reuse_single_environment(self) -> None:
+        """Two calls with the same arguments create only one Environment."""
+        with patch("docc.plugins.html.PackageLoader") as mock_loader:
+            env1 = _get_jinja_env(
+                "docc.plugins.html", with_reference_extension=True
+            )
+            env2 = _get_jinja_env(
+                "docc.plugins.html", with_reference_extension=True
+            )
+            assert env1 is env2
+            assert mock_loader.call_count == 1
+
+    def test_cache_keys_for_different_args(self) -> None:
+        """Different package/extension combos produce distinct keys."""
+        _get_jinja_env("docc.plugins.html", with_reference_extension=True)
+        _get_jinja_env("docc.plugins.html", with_reference_extension=False)
+        _get_jinja_env(
+            "docc.plugins.listing",
+            with_reference_extension=True,
+        )
+
+        assert "docc.plugins.html:True" in _JINJA_ENVS
+        assert "docc.plugins.html:False" in _JINJA_ENVS
+        assert "docc.plugins.listing:True" in _JINJA_ENVS
+        assert len(_JINJA_ENVS) == 3
diff --git a/tests/test_listing.py b/tests/test_listing.py
index 1aefd22..209d478 100644
--- a/tests/test_listing.py
+++ b/tests/test_listing.py
@@ -18,7 +18,9 @@
 from typing import Dict, Iterator, List, Optional, Set
 
 import pytest
+from jinja2 import Environment
 
+import docc.plugins.listing as listing_module
 from docc.context import Context
 from docc.document import BlankNode, Document
 from docc.plugins.html import HTMLTag
@@ -28,6 +30,7 @@
     ListingDiscover,
     ListingNode,
     ListingSource,
+    _get_listing_env,
     render_html,
 )
 from docc.settings import PluginSettings, Settings
@@ -340,3 +343,27 @@ def find_tags(node: object, tag_name: str) -> List[HTMLTag]:
         assert href.endswith(
             ".html"
         ), f"Link href should end with .html, got: {href}"
+
+
+class TestListingEnvCache:
+    def setup_method(self) -> None:
+        """Reset the module-level cache before each test."""
+        listing_module._LISTING_ENV = None
+
+    def test_returns_environment_with_correct_loader(self) -> None:
+        """Verify _get_listing_env returns an Environment with a loader."""
+        env = _get_listing_env()
+        assert isinstance(env, Environment)
+        assert env.loader is not None
+
+    def test_two_calls_return_same_object(self) -> None:
+        """Two calls to _get_listing_env return the exact same object."""
+        env1 = _get_listing_env()
+        env2 = _get_listing_env()
+        assert env1 is env2
+
+    def test_cache_is_populated_after_call(self) -> None:
+        """Verify the module-level cache is populated after a call."""
+        assert listing_module._LISTING_ENV is None
+        _get_listing_env()
+        assert listing_module._LISTING_ENV is not None
diff --git a/whitelist.txt b/whitelist.txt
index 82b28be..a56fe2a 100644
--- a/whitelist.txt
+++ b/whitelist.txt
@@ -29,6 +29,9 @@ docstrings1
 docstrings2
 endtag
 entityref
+env1
+env2
+envs
 ep
 eq
 ethereum

From 35837a1ef95b8eea9ca2769975e0e13d53b56608 Mon Sep 17 00:00:00 2001
From: danceratopz 
Date: Tue, 24 Feb 2026 16:55:27 +0100
Subject: [PATCH 13/14] feat(perf): Share loaded renderers across HTMLVisitor
 instances.

---
 pr.md                             |  16 ++++
 src/docc/plugins/html/__init__.py |   4 +-
 tests/test_html_renderer_cache.py | 129 ++++++++++++++++++++++++++++++
 3 files changed, 148 insertions(+), 1 deletion(-)
 create mode 100644 pr.md
 create mode 100644 tests/test_html_renderer_cache.py

diff --git a/pr.md b/pr.md
new file mode 100644
index 0000000..8c0221b
--- /dev/null
+++ b/pr.md
@@ -0,0 +1,16 @@
+## What changed
+
+Moved the `renderers` dictionary from a per-instance attribute on `HTMLVisitor` to a module-level shared cache (`_LOADED_RENDERERS`). Each `HTMLVisitor` instance now references the same dictionary, so `EntryPoint.load()` is called at most once per node type across all visitors rather than once per visitor per node type.
+
+## Why
+
+`HTMLVisitor` is instantiated once per document during the HTML transform phase. Previously, each instance maintained its own `renderers` dict and independently called `EntryPoint.load()` for every node type it encountered. For projects with many documents, this meant redundant entry point loading on every single document, making the transform phase unnecessarily slow.
+
+## Test coverage
+
+- **Existing tests:** `test_html.py` covered `HTMLVisitor.enter()` behavior, renderer error paths, and stack management, but did not verify cross-instance renderer sharing.
+- **What was missing:** No tests confirmed that renderer loading was cached across multiple `HTMLVisitor` instances, that the cache was keyed correctly by `Type[Node]`, or that `EntryPoint.load()` was only called once per node type.
+- **What was added:** `tests/test_html_renderer_cache.py` with three test classes:
+  - **Behavioral tests:** Verify that visiting a `BlankNode` resolves the correct renderer and produces expected stack output.
+  - **Call-count tests:** Use mock entry points to assert `EntryPoint.load()` is called exactly once when two separate visitors visit the same node type.
+  - **Cache-keying tests:** Verify that `_LOADED_RENDERERS` entries are keyed by `Type[Node]` subclasses with callable values, and that all `HTMLVisitor` instances reference the same shared dictionary object.
diff --git a/src/docc/plugins/html/__init__.py b/src/docc/plugins/html/__init__.py
index c174ed4..8fd19fe 100644
--- a/src/docc/plugins/html/__init__.py
+++ b/src/docc/plugins/html/__init__.py
@@ -75,6 +75,8 @@
 else:
     from importlib.metadata import EntryPoint, entry_points
 
+_LOADED_RENDERERS: Dict[Type[Node], Callable[..., object]] = {}
+
 
 # Module-level cache for HTML renderer entry points
 _HTML_ENTRY_POINTS: Optional[Dict[str, EntryPoint]] = None
@@ -460,7 +462,7 @@ def __init__(self, context: Context) -> None:
         self.entry_points = _get_html_entry_points()
         self.root = HTMLRoot(context)
         self.stack = [self.root]
-        self.renderers = {}
+        self.renderers = _LOADED_RENDERERS
         self.context = context
 
     def _renderer(self, node: Node) -> Callable[..., object]:
diff --git a/tests/test_html_renderer_cache.py b/tests/test_html_renderer_cache.py
new file mode 100644
index 0000000..dfc4fcd
--- /dev/null
+++ b/tests/test_html_renderer_cache.py
@@ -0,0 +1,129 @@
+# Copyright (C) 2025 Ethereum Foundation
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+
+"""Tests for the shared renderer cache in the HTML plugin."""
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from docc.context import Context
+from docc.document import BlankNode, Node, Visit
+from docc.plugins.html import (
+    _LOADED_RENDERERS,
+    HTMLVisitor,
+)
+
+
+@pytest.fixture(autouse=True)  # noqa: SC200
+def _clear_renderer_cache() -> None:
+    _LOADED_RENDERERS.clear()
+
+
+class TestRendererCacheBehavioral:
+    """Verify that HTMLVisitor resolves renderers and produces output."""
+
+    def test_visitor_resolves_blank_node_renderer(self) -> None:
+        context = Context({})
+        visitor = HTMLVisitor(context)
+        blank = BlankNode()
+
+        result = visitor.enter(blank)
+
+        assert result == Visit.SkipChildren
+
+    def test_visitor_produces_expected_output_for_blank_node(self) -> None:
+        context = Context({})
+        visitor = HTMLVisitor(context)
+        blank = BlankNode()
+
+        initial_stack_len = len(visitor.stack)
+        visitor.enter(blank)
+
+        assert len(visitor.stack) == initial_stack_len + 1
+        assert isinstance(visitor.stack[-1], BlankNode)
+
+
+class TestRendererCacheCallCount:
+    """Verify that EntryPoint.load() is called at most once per node type."""
+
+    def test_load_called_once_for_two_visitors(self) -> None:
+        mock_renderer = MagicMock(return_value=None)
+        mock_entry_point = MagicMock()
+        mock_entry_point.load.return_value = mock_renderer
+
+        context = Context({})
+        first_visitor = HTMLVisitor(context)
+        second_visitor = HTMLVisitor(context)
+
+        key = "docc.document:BlankNode"
+
+        # Inject the mock entry point into both visitors.
+        first_visitor.entry_points[key] = mock_entry_point
+        second_visitor.entry_points[key] = mock_entry_point
+
+        # First visitor triggers load.
+        first_visitor.enter(BlankNode())
+        assert mock_entry_point.load.call_count == 1
+
+        # Second visitor reuses the cache; load is not called again.
+        second_visitor.enter(BlankNode())
+        assert mock_entry_point.load.call_count == 1
+
+    def test_load_called_once_for_same_visitor_twice(self) -> None:
+        mock_renderer = MagicMock(return_value=None)
+        mock_entry_point = MagicMock()
+        mock_entry_point.load.return_value = mock_renderer
+
+        context = Context({})
+        visitor = HTMLVisitor(context)
+
+        key = "docc.document:BlankNode"
+        visitor.entry_points[key] = mock_entry_point
+
+        visitor.enter(BlankNode())
+        # Pop the stack entry added by enter() so we can call enter() again.
+        visitor.stack.pop()
+        visitor.enter(BlankNode())
+
+        assert mock_entry_point.load.call_count == 1
+
+
+class TestRendererCacheKeying:
+    """Verify that the shared cache is keyed by Type[Node] subclasses."""
+
+    def test_cache_contains_blank_node_after_visit(self) -> None:
+        context = Context({})
+        visitor = HTMLVisitor(context)
+        visitor.enter(BlankNode())
+
+        assert BlankNode in _LOADED_RENDERERS
+
+    def test_cache_values_are_callable(self) -> None:
+        context = Context({})
+        visitor = HTMLVisitor(context)
+        visitor.enter(BlankNode())
+
+        for key, value in _LOADED_RENDERERS.items():
+            assert issubclass(key, Node), f"Key {key} is not a Node subclass."
+            assert callable(value), f"Value for {key} is not callable."
+
+    def test_visitors_share_same_renderers_dict(self) -> None:
+        context = Context({})
+        first_visitor = HTMLVisitor(context)
+        second_visitor = HTMLVisitor(context)
+
+        assert first_visitor.renderers is second_visitor.renderers
+        assert first_visitor.renderers is _LOADED_RENDERERS

From 45b27bf1a22b8d6f69ea7fbaf9620695aa777b22 Mon Sep 17 00:00:00 2001
From: danceratopz 
Date: Tue, 24 Feb 2026 17:37:54 +0100
Subject: [PATCH 14/14] Fix test pollution of shared renderer cache

The test_enter_returning_tag_pushes_and_traverses test was directly
assigning a fake renderer into visitor.renderers, which with the shared
_LOADED_RENDERERS cache now persists across all subsequent HTMLVisitor
instances. This caused test_definition_to_html_output to use the fake
ListNode renderer instead of the real one, producing empty HTML output.

Wrap the fake renderer injection in patch.dict() so it is automatically
cleaned up after the test completes.
---
 pr.md              | 16 ----------------
 tests/test_html.py | 11 +++++------
 2 files changed, 5 insertions(+), 22 deletions(-)
 delete mode 100644 pr.md

diff --git a/pr.md b/pr.md
deleted file mode 100644
index 8c0221b..0000000
--- a/pr.md
+++ /dev/null
@@ -1,16 +0,0 @@
-## What changed
-
-Moved the `renderers` dictionary from a per-instance attribute on `HTMLVisitor` to a module-level shared cache (`_LOADED_RENDERERS`). Each `HTMLVisitor` instance now references the same dictionary, so `EntryPoint.load()` is called at most once per node type across all visitors rather than once per visitor per node type.
-
-## Why
-
-`HTMLVisitor` is instantiated once per document during the HTML transform phase. Previously, each instance maintained its own `renderers` dict and independently called `EntryPoint.load()` for every node type it encountered. For projects with many documents, this meant redundant entry point loading on every single document, making the transform phase unnecessarily slow.
-
-## Test coverage
-
-- **Existing tests:** `test_html.py` covered `HTMLVisitor.enter()` behavior, renderer error paths, and stack management, but did not verify cross-instance renderer sharing.
-- **What was missing:** No tests confirmed that renderer loading was cached across multiple `HTMLVisitor` instances, that the cache was keyed correctly by `Type[Node]`, or that `EntryPoint.load()` was only called once per node type.
-- **What was added:** `tests/test_html_renderer_cache.py` with three test classes:
-  - **Behavioral tests:** Verify that visiting a `BlankNode` resolves the correct renderer and produces expected stack output.
-  - **Call-count tests:** Use mock entry points to assert `EntryPoint.load()` is called exactly once when two separate visitors visit the same node type.
-  - **Cache-keying tests:** Verify that `_LOADED_RENDERERS` entries are keyed by `Type[Node]` subclasses with callable values, and that all `HTMLVisitor` instances reference the same shared dictionary object.
diff --git a/tests/test_html.py b/tests/test_html.py
index a206953..30dde52 100644
--- a/tests/test_html.py
+++ b/tests/test_html.py
@@ -610,13 +610,12 @@ def fake_renderer(
         ) -> HTMLTag:
             return result_tag
 
-        visitor.renderers[ListNode] = fake_renderer
+        with patch.dict(visitor.renderers, {ListNode: fake_renderer}):
+            node = ListNode()
+            result = visitor.enter(node)
 
-        node = ListNode()
-        result = visitor.enter(node)
-
-        assert result == Visit.TraverseChildren
-        assert visitor.stack[-1] is result_tag
+            assert result == Visit.TraverseChildren
+            assert visitor.stack[-1] is result_tag
 
 
 class TestRenderReference: