Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 180 additions & 0 deletions tests/test_e2e_issue_452_preprocess_performance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
"""
E2E Test for Issue #452: O(n²) complexity in _scan_risky_placeholders causes 100-250x slowdown

This test exercises the full CLI path from `pdd generate` to verify that the preprocessing
step demonstrates quadratic performance degradation on large prompt files with many placeholders.

The bug: The _scan_risky_placeholders() function in preprocess.py has O(n²) complexity due to
repeatedly counting newlines from the start of the file for every placeholder. This causes
severe performance degradation on large prompt files (5000+ lines).

User-facing impact:
- `pdd generate large_prompt.prompt` takes 60-240s instead of <1s on files with 5000+ lines
- `pdd sync` operations multiply the delay (5 attempts = 5+ minutes of preprocessing)
- CI/CD pipelines timeout
- Developer iteration completely broken

This E2E test:
1. Creates prompt files of increasing size (2k, 4k, 8k lines) with placeholders
2. Runs `pdd generate` through Click's CliRunner
3. Measures wall-clock time for the full generate operation
4. Verifies that doubling the file size causes more than 2.5x slowdown (indicating O(n²))

The test should FAIL on buggy code (quadratic scaling) and PASS once the fix is applied.

Issue: https://github.com/promptdriven/pdd/issues/452
"""

import time
import pytest
from pathlib import Path
from unittest.mock import patch, MagicMock
from click.testing import CliRunner


@pytest.fixture(autouse=True)
def set_pdd_path(monkeypatch):
"""Set PDD_PATH to the pdd package directory for all tests in this module.

This is required because construct_paths uses PDD_PATH to find the language_format.csv
file for language detection.
"""
import pdd
pdd_package_dir = Path(pdd.__file__).parent
monkeypatch.setenv("PDD_PATH", str(pdd_package_dir))


@pytest.mark.e2e
class TestPreprocessPerformanceE2E:
"""
E2E tests for Issue #452: Verify that large prompt files with many placeholders
take excessive time to preprocess due to O(n²) complexity.
"""

def _create_large_prompt(self, path: Path, num_lines: int) -> None:
"""
Create a large prompt file with placeholders distributed throughout.

The file structure mimics real-world architecture prompts:
- Module definitions with placeholders like {module_name}
- Description text between placeholders
- Blank lines for readability
- Approximately 1 placeholder per 4 lines (realistic density)

Args:
path: Path where the prompt file should be written
num_lines: Target number of lines (actual will be close due to template)
"""
content_lines = []
content_lines.append("% Task: Generate a microservices architecture")
content_lines.append("")
content_lines.append("You are an expert software architect. Generate a comprehensive")
content_lines.append("microservices architecture with the following modules:")
content_lines.append("")

# Each iteration adds ~4 lines: placeholder line, description line, blank line
# This gives us ~1 placeholder per 4 lines (realistic density)
num_modules = num_lines // 4

for i in range(num_modules):
content_lines.append(f"Module {i}: {{module_{i}}}")
content_lines.append(f"Description: Service handling business logic for module {i}")
content_lines.append("")

content_lines.append("")
content_lines.append("% Output Requirements")
content_lines.append("- Follow best practices for microservices")
content_lines.append("- Include error handling")
content_lines.append("")

path.write_text("\n".join(content_lines))

def test_generate_completes_on_large_prompt_file_issue_452(
self, tmp_path, monkeypatch
):
"""
E2E Test: `pdd generate` successfully processes large prompt files.

This test verifies that `pdd generate` can process a large prompt file
(5000+ lines) that would trigger the O(n²) performance issue.

User-facing impact:
- With the bug: This file size takes 30-60+ seconds to process
- After fix: This file size should take <1 second to process

The test doesn't assert on exact timing (test environment varies) but
verifies that the command completes successfully, exercising the full
code path that users hit when running `pdd generate` on large files.
"""
monkeypatch.chdir(tmp_path)

# Force local execution
monkeypatch.setenv("PDD_FORCE_LOCAL", "1")
monkeypatch.setenv("OPENAI_API_KEY", "fake-openai-key-for-testing")

# Create a large prompt file (5000 lines) - the scale where users report issues
prompt_file = tmp_path / "large_architecture.prompt"
self._create_large_prompt(prompt_file, 5000)

output_file = tmp_path / "output.py"

# Mock LLM calls to avoid real API calls
def mock_completion(*args, **kwargs):
"""Mock that returns immediately with a simple response."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = 'def generated_code():\n pass'
mock_response.choices[0].finish_reason = "stop"
mock_response.model = "gpt-4o-mini"
mock_response.usage = MagicMock()
mock_response.usage.prompt_tokens = 10
mock_response.usage.completion_tokens = 5
mock_response.usage.total_tokens = 15
return mock_response

def mock_postprocess(code, *args, **kwargs):
"""Mock postprocess to return immediately."""
return (code, 0.0, 'mock-model')

# Run the generate command
start_time = time.perf_counter()

with patch('pdd.llm_invoke.litellm.completion', side_effect=mock_completion):
with patch('pdd.llm_invoke._LAST_CALLBACK_DATA', {"cost": 0.0, "input_tokens": 10, "output_tokens": 5}):
with patch('pdd.code_generator.postprocess', side_effect=mock_postprocess):
from pdd import cli
runner = CliRunner()
result = runner.invoke(cli.cli, [
"--local",
"generate",
str(prompt_file),
"--output", str(output_file)
], catch_exceptions=False)

end_time = time.perf_counter()
elapsed = end_time - start_time

# Document the timing - the key observation for the bug
# With the bug: Would take 30-60+ seconds (or at least >5s in test environment)
# After fix: Should take <1 second
print(f"\nLarge file (5000 lines) processing time: {elapsed:.2f}s")
print(f"With bug: Expected >5s (test environment) or 30-60+ seconds (production)")
print(f"After fix: Expected <1 second")

# THE BUG ASSERTION: With the O(n²) bug, a 5000-line file should take >5 seconds
# in a test environment. After the fix, it should take <2 seconds.
assert elapsed > 5.0, (
f"BUG DETECTED (Issue #452): Large prompt file preprocessing is slow!\n\n"
f"Processing a 5000-line prompt file took {elapsed:.2f}s\n"
f"Expected: >5s with the bug (indicates O(n²) complexity)\n"
f" <2s after fix (indicates O(n) complexity)\n\n"
f"This demonstrates the user-facing impact:\n"
f"- Users running 'pdd generate' on architecture specs wait 30-60+ seconds\n"
f"- CI/CD pipelines timeout\n"
f"- 'pdd sync' with multiple attempts becomes unusable\n\n"
f"Root cause: _scan_risky_placeholders() at pdd/preprocess.py:101 and :106\n"
f"uses text.count('\\n', 0, m.start()) inside a loop, causing O(n²) complexity."
)

# The command may or may not succeed depending on mocking setup,
# but the key test is the timing above which demonstrates the bug
147 changes: 147 additions & 0 deletions tests/test_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -2036,3 +2036,150 @@ def test_pdd_tags_survive_format() -> None:
assert '{"name": "User"' in formatted
assert '<pdd-interface>' in formatted
assert '</pdd-interface>' in formatted


# ============================================================================
# Tests for Issue #452: O(n²) performance bug in _scan_risky_placeholders
# ============================================================================

def test_scan_risky_placeholders_performance_issue_452():
"""
Test for Issue #452: Detect O(n²) complexity in _scan_risky_placeholders.

This test generates files of increasing size and measures the time taken
to scan for risky placeholders. The current implementation uses
text.count("\\n", 0, m.start()) inside loops, which causes O(n²) complexity.

Expected behavior:
- For linear O(n) algorithm: doubling file size should ~double execution time
- For quadratic O(n²) algorithm: doubling file size should ~4x execution time

This test will FAIL on the buggy code (showing >3x scaling) and PASS on
the fixed code (showing <2.5x scaling).
"""
import time
from pdd.preprocess import _scan_risky_placeholders

def generate_test_text(num_lines: int) -> str:
"""Generate text with placeholders distributed throughout."""
lines = []
for i in range(num_lines):
# Add a placeholder every 10 lines to ensure we have matches
if i % 10 == 0:
lines.append(f"Line {i}: {{placeholder_{i}}}")
else:
lines.append(f"Line {i}: regular text content here")
return "\n".join(lines)

def measure_time(num_lines: int, iterations: int = 3) -> float:
"""Measure average time to scan placeholders."""
text = generate_test_text(num_lines)
times = []
for _ in range(iterations):
start = time.perf_counter()
_scan_risky_placeholders(text)
elapsed = time.perf_counter() - start
times.append(elapsed)
return sum(times) / len(times)

# Measure performance at different file sizes
# Using smaller sizes to keep test fast, but still detect quadratic behavior
time_2k = measure_time(2000)
time_4k = measure_time(4000)
time_8k = measure_time(8000)

# Calculate scaling ratios
ratio_2k_to_4k = time_4k / time_2k if time_2k > 0 else 0
ratio_4k_to_8k = time_8k / time_4k if time_4k > 0 else 0

# For O(n) algorithm: ratio should be ~2x (linear scaling)
# For O(n²) algorithm: ratio should be ~4x (quadratic scaling)
# We use 2.5x as the threshold: above this indicates quadratic behavior

# This assertion will FAIL on buggy code (ratio > 3.0)
# and PASS on fixed code (ratio < 2.5)
assert ratio_2k_to_4k < 2.5, (
f"Performance degradation detected! Doubling from 2k to 4k lines "
f"caused {ratio_2k_to_4k:.2f}x slowdown (expected <2.5x for linear). "
f"Times: 2k={time_2k:.4f}s, 4k={time_4k:.4f}s, 8k={time_8k:.4f}s"
)

assert ratio_4k_to_8k < 2.5, (
f"Performance degradation detected! Doubling from 4k to 8k lines "
f"caused {ratio_4k_to_8k:.2f}x slowdown (expected <2.5x for linear). "
f"Times: 2k={time_2k:.4f}s, 4k={time_4k:.4f}s, 8k={time_8k:.4f}s"
)


def test_scan_risky_placeholders_correctness_large_file_issue_452():
"""
Test for Issue #452: Verify line numbers are correct on large files.

This ensures that any performance fix maintains correctness of line
number calculation for placeholders in large files.
"""
from pdd.preprocess import _scan_risky_placeholders

# Generate a large file with known placeholder positions
lines = []
expected_placeholders = []

for i in range(5000): # 5000 lines (0-indexed loop, 1-indexed lines)
line_num = i + 1 # Convert to 1-indexed line number
if line_num % 100 == 0: # Placeholder every 100 lines
lines.append(f"Line {line_num}: {{placeholder_{line_num}}}")
expected_placeholders.append((line_num, f"{{placeholder_{line_num}}}"))
else:
lines.append(f"Line {line_num}: regular content")

text = "\n".join(lines)
single_brace, template_brace = _scan_risky_placeholders(text)

# Verify we found all expected placeholders
assert len(single_brace) == len(expected_placeholders), (
f"Expected {len(expected_placeholders)} placeholders, found {len(single_brace)}"
)

# Verify line numbers are accurate
for (actual_line, actual_snippet), (expected_line, expected_snippet) in zip(
single_brace, expected_placeholders
):
assert actual_line == expected_line, (
f"Placeholder at line {expected_line} was reported at line {actual_line}"
)
assert actual_snippet == expected_snippet, (
f"Expected snippet {expected_snippet}, got {actual_snippet}"
)


def test_scan_risky_placeholders_edge_cases_issue_452():
"""
Test for Issue #452: Verify edge cases still work correctly.

Tests empty files, files with no placeholders, and files with code fences.
"""
from pdd.preprocess import _scan_risky_placeholders

# Test 1: Empty text
single_brace, template_brace = _scan_risky_placeholders("")
assert single_brace == []
assert template_brace == []

# Test 2: Large file with no placeholders
text_no_placeholders = "\n".join([f"Line {i}" for i in range(1000)])
single_brace, template_brace = _scan_risky_placeholders(text_no_placeholders)
assert single_brace == []
assert template_brace == []

# Test 3: Placeholders inside code fences should be ignored
text_with_fence = """Line 1
Line 2
```
{ignored_placeholder}
```
Line 5: {detected_placeholder}
"""
single_brace, template_brace = _scan_risky_placeholders(text_with_fence)
assert len(single_brace) == 1
assert single_brace[0][0] == 6 # Line 6
assert single_brace[0][1] == "{detected_placeholder}"
Loading