From a4d722e20b1a1f81f34b646084fc38ea8170f7d9 Mon Sep 17 00:00:00 2001 From: James Levine Date: Thu, 15 Jan 2026 22:21:06 -0800 Subject: [PATCH 1/3] Ensuring the tests are run in the pdd conda environment updated with the .[dev] requirements from the current worktree - Added a phony target to do the update; made the appropriate test targets depend on it. --- Makefile | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 83e4d980a..eb4b05ddb 100644 --- a/Makefile +++ b/Makefile @@ -101,7 +101,7 @@ TEST_OUTPUTS := $(patsubst $(PDD_DIR)/%.py,$(TESTS_DIR)/test_%.py,$(PY_OUTPUTS)) # All Example files in context directory (recursive) EXAMPLE_FILES := $(shell find $(CONTEXT_DIR) -name "*_example.py" 2>/dev/null) -.PHONY: all clean test requirements production coverage staging regression sync-regression all-regression cloud-regression install build analysis fix crash update update-extension generate run-examples verify detect change lint publish publish-public publish-public-cap public-ensure public-update public-import public-diff sync-public +.PHONY: all clean test requirements production coverage staging regression sync-regression all-regression cloud-regression install build analysis fix crash update update-extension generate run-examples verify detect change lint publish publish-public publish-public-cap public-ensure public-update public-import public-diff sync-public ensure-dev-deps all: $(PY_OUTPUTS) $(MAKEFILE_OUTPUT) $(CSV_OUTPUTS) $(EXAMPLE_OUTPUTS) $(TEST_OUTPUTS) @@ -190,20 +190,25 @@ run-examples: $(EXAMPLE_FILES) ) @echo "All examples ran successfully" +# Ensure dev dependencies are installed before running tests +ensure-dev-deps: + @echo "Updating pdd conda environment with dev dependencies" + @conda run -n pdd --no-capture-output pip install -e '.[dev]' + # Run tests -test: +test: ensure-dev-deps @echo "Running staging tests" @cd $(STAGING_DIR) @conda run -n pdd --no-capture-output PDD_RUN_REAL_LLM_TESTS=1 PDD_RUN_LLM_TESTS=1 PDD_PATH=$(abspath $(PDD_DIR)) PYTHONPATH=$(PDD_DIR):$$PYTHONPATH python -m pytest -vv -n auto $(TESTS_DIR) # Run tests with coverage -coverage: +coverage: ensure-dev-deps @echo "Running tests with coverage" @cd $(STAGING_DIR) @conda run -n pdd --no-capture-output PDD_PATH=$(STAGING_DIR) PYTHONPATH=$(PDD_DIR):$$PYTHONPATH python -m pytest --cov=$(PDD_DIR) --cov-report=term-missing --cov-report=html $(TESTS_DIR) # Run pylint -lint: +lint: ensure-dev-deps @echo "Running pylint" @conda run -n pdd --no-capture-output pylint pdd tests @@ -467,7 +472,7 @@ production: @cp -r $(TESTS_DIR) . @cp $(MAKEFILE_OUTPUT) . -regression: +regression: ensure-dev-deps @echo "Running regression tests" @mkdir -p staging/regression @find staging/regression -type f ! -name ".*" -delete @@ -480,7 +485,7 @@ endif SYNC_PARALLEL ?= 1 -sync-regression: +sync-regression: ensure-dev-deps @echo "Running sync regression tests" ifdef TEST_NUM @echo "Running specific sync test: $(TEST_NUM)" @@ -495,10 +500,10 @@ else endif endif -all-regression: regression sync-regression cloud-regression +all-regression: ensure-dev-deps regression sync-regression cloud-regression @echo "All regression test suites completed." -cloud-regression: +cloud-regression: ensure-dev-deps @echo "Running cloud regression tests" @mkdir -p staging/cloud_regression ifdef TEST_NUM @@ -510,7 +515,7 @@ endif # Automated test runner with Infisical for CI/CD .PHONY: test-all-ci -test-all-ci: +test-all-ci: ensure-dev-deps @echo "Running all test suites with result capture for CI/CD" @mkdir -p test_results ifdef PR_NUMBER @@ -525,7 +530,7 @@ endif # Run all tests with Infisical (for local development and CI) .PHONY: test-all-with-infisical -test-all-with-infisical: +test-all-with-infisical: ensure-dev-deps @echo "Running all test suites with Infisical" @if ! command -v infisical &> /dev/null; then \ echo "Error: Infisical CLI not found. Please install it:"; \ From 066189e4eb73bbe28af68f424852c7daa6b35e8e Mon Sep 17 00:00:00 2001 From: James Levine Date: Sun, 18 Jan 2026 17:47:27 -0800 Subject: [PATCH 2/3] Remove redundant ensure-dev-deps from all-regression target Address Copilot review feedback: the ensure-dev-deps dependency is redundant on all-regression since regression, sync-regression, and cloud-regression already depend on it. Co-Authored-By: Claude Opus 4.5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index eb4b05ddb..fd284e9be 100644 --- a/Makefile +++ b/Makefile @@ -500,7 +500,7 @@ else endif endif -all-regression: ensure-dev-deps regression sync-regression cloud-regression +all-regression: regression sync-regression cloud-regression @echo "All regression test suites completed." cloud-regression: ensure-dev-deps From 74671bf7ca8e3d4c0d61c67c0864c7df8431f146 Mon Sep 17 00:00:00 2001 From: James Levine Date: Mon, 19 Jan 2026 17:28:45 -0800 Subject: [PATCH 3/3] Add failing tests for issue #154: file overwrite silently skipped This commit adds unit tests and E2E tests that reproduce the bug where "generation overwrite a file doesn't always happen". The tests verify that the code should raise an error when output_path is empty or None, rather than silently skipping the file write. Unit tests (tests/test_code_generator_main.py): - test_issue_154_empty_output_path_should_raise_error - test_issue_154_none_output_path_should_raise_error - test_issue_154_llm_should_not_be_called_with_invalid_output_path E2E tests (tests/test_e2e_issue_154_overwrite.py): - test_generate_fails_when_output_path_is_empty - test_generate_fails_when_output_path_is_empty_string - test_generate_succeeds_when_output_path_is_valid - test_no_overwrite_prompt_when_output_path_invalid These tests currently FAIL on the buggy code and will pass once the fix is implemented at pdd/code_generator_main.py:1087. Fixes #154 Co-Authored-By: Claude Opus 4.5 --- tests/test_code_generator_main.py | 142 ++++++++++ tests/test_e2e_issue_154_overwrite.py | 359 ++++++++++++++++++++++++++ 2 files changed, 501 insertions(+) create mode 100644 tests/test_e2e_issue_154_overwrite.py diff --git a/tests/test_code_generator_main.py b/tests/test_code_generator_main.py index 5933fe1f6..83700f95b 100644 --- a/tests/test_code_generator_main.py +++ b/tests/test_code_generator_main.py @@ -2223,3 +2223,145 @@ def test_json_serialization_after_unwrap(self): expected = '[\n {\n "key": "value"\n }\n]' assert generated_code_content == expected + + +# ============================================================================= +# Issue #154 Regression Tests: File Overwrite Not Happening +# ============================================================================= +# +# These tests verify the bug where "generation overwrite a file doesn't always +# happen". The root cause is an architectural disconnect between: +# 1. construct_paths() - handles overwrite confirmation +# 2. code_generator_main() - handles actual file write at line 1087 +# +# When generate_output_paths() returns {} (e.g., due to empty basename), +# output_path becomes falsy and the write block is skipped silently. +# +# TEST STRATEGY: +# These tests assert the EXPECTED FIXED behavior, so they will FAIL on the +# current buggy code and PASS once the bug is fixed. +# ============================================================================= + + +class TestIssue154OverwriteNotHappening: + """ + Regression tests for GitHub Issue #154: File overwrite not always happening. + + The bug manifests when: + 1. construct_paths() returns empty/falsy output_file_paths + 2. LLM successfully generates code + 3. The file write block at line 1087 is silently skipped due to `if output_path:` + + These tests assert the EXPECTED behavior after the fix, so they will + FAIL on the current buggy code. + """ + + def test_issue_154_empty_output_path_should_raise_error( + self, mock_ctx, temp_dir_setup, mock_construct_paths_fixture, + mock_local_generator_fixture, mock_rich_console_fixture, mock_env_vars + ): + """ + FAILING TEST for issue #154: Empty output_path should raise an error. + + Current behavior (BUG): Code generates, write is silently skipped. + Expected behavior (FIX): Should raise click.UsageError when output_path is empty. + + This test FAILS on the current buggy code because no error is raised. + """ + mock_ctx.obj['local'] = True + mock_ctx.obj['quiet'] = False + prompt_file_path = temp_dir_setup["prompts_dir"] / "issue154_prompt.prompt" + create_file(prompt_file_path, "Test prompt for issue 154") + + # Simulate what happens when generate_output_paths returns {} + # due to empty basename - output_file_paths.get("output") returns "" + mock_construct_paths_fixture.return_value = ( + {}, # resolved_config + {"prompt_file": "Test prompt for issue 154"}, + {"output": ""}, # Empty string - falsy output path (triggers the bug) + "python" + ) + + # EXPECTED FIX: Should raise click.UsageError when output_path is empty + # CURRENT BUG: No error raised, write is silently skipped + with pytest.raises(click.UsageError) as excinfo: + code_generator_main( + mock_ctx, str(prompt_file_path), "", None, False + ) + + # Verify the error message is helpful + assert "output" in str(excinfo.value).lower() or "path" in str(excinfo.value).lower(), \ + "Error message should mention output path issue" + + def test_issue_154_none_output_path_should_raise_error( + self, mock_ctx, temp_dir_setup, mock_construct_paths_fixture, + mock_local_generator_fixture, mock_rich_console_fixture, mock_env_vars + ): + """ + FAILING TEST for issue #154: None output_path should raise an error. + + When construct_paths returns None for output path (e.g., when + generate_output_paths returns {} and .get("output") returns None), + an error should be raised instead of silently skipping the write. + + This test FAILS on the current buggy code. + """ + mock_ctx.obj['local'] = True + mock_ctx.obj['quiet'] = False + prompt_file_path = temp_dir_setup["prompts_dir"] / "issue154_none_prompt.prompt" + create_file(prompt_file_path, "Test prompt for issue 154 - None case") + + # Simulate output_file_paths.get("output") returning None + mock_construct_paths_fixture.return_value = ( + {}, # resolved_config + {"prompt_file": "Test prompt for issue 154 - None case"}, + {"output": None}, # None output path - triggers the bug + "python" + ) + + # EXPECTED FIX: Should raise click.UsageError when output_path is None + # CURRENT BUG: No error, write silently skipped, console shows warning + with pytest.raises(click.UsageError) as excinfo: + code_generator_main( + mock_ctx, str(prompt_file_path), None, None, False + ) + + assert "output" in str(excinfo.value).lower() or "path" in str(excinfo.value).lower() + + def test_issue_154_llm_should_not_be_called_with_invalid_output_path( + self, mock_ctx, temp_dir_setup, mock_construct_paths_fixture, + mock_local_generator_fixture, mock_env_vars + ): + """ + FAILING TEST for issue #154: LLM should NOT be called when output_path is invalid. + + The architectural bug is that output_path validation happens AFTER the + expensive LLM call. The LLM is called (costing money/time) even when + output_path is invalid/empty. + + Expected fix: Validate output_path BEFORE calling the LLM. + This test FAILS on the current buggy code because the LLM IS called. + """ + mock_ctx.obj['local'] = True + prompt_file_path = temp_dir_setup["prompts_dir"] / "issue154_llm_call.prompt" + create_file(prompt_file_path, "Test prompt") + + # Empty output path - simulates generate_output_paths returning {} + mock_construct_paths_fixture.return_value = ( + {}, + {"prompt_file": "Test prompt"}, + {"output": ""}, # Empty - should be caught BEFORE LLM call + "python" + ) + + # Expect an error to be raised BEFORE the LLM is called + try: + code_generator_main(mock_ctx, str(prompt_file_path), "", None, False) + except click.UsageError: + pass # This is the expected behavior after fix + + # EXPECTED FIX: LLM should NOT be called when output_path is invalid + # CURRENT BUG: LLM IS called, wasting resources + assert not mock_local_generator_fixture.called, \ + "LLM should NOT be called when output_path is empty/invalid. " \ + "The output path should be validated BEFORE expensive LLM operations." diff --git a/tests/test_e2e_issue_154_overwrite.py b/tests/test_e2e_issue_154_overwrite.py new file mode 100644 index 000000000..dcf03a4a8 --- /dev/null +++ b/tests/test_e2e_issue_154_overwrite.py @@ -0,0 +1,359 @@ +""" +E2E Test for Issue #154: Generation overwrite a file doesn't always happen + +This test exercises the full CLI path from `pdd generate` to verify that when +the output path resolution fails (returns empty/None), the command either: +1. Raises an error BEFORE calling the expensive LLM, or +2. At minimum, returns a non-zero exit code + +The bug: When generate_output_paths returns empty dict (due to empty basename), +output_file_paths.get("output") returns None/empty, and the file write is +silently skipped at code_generator_main.py:1087. The user may have confirmed +overwrite, the LLM is called (costing money), but the file is never written. + +This E2E test: +1. Sets up a temp project with a prompt file that triggers the bug +2. Runs the generate command through Click's CliRunner +3. Mocks generate_output_paths to return empty dict (simulating the bug trigger) +4. Verifies the command fails with an error instead of silently skipping + +The test should FAIL on buggy code (silent skip, exit code 0) and PASS once fixed. +""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import Dict, Any +from unittest.mock import patch, MagicMock + +import pytest +from click.testing import CliRunner + + +@pytest.fixture(autouse=True) +def set_pdd_path(monkeypatch): + """Set PDD_PATH to the pdd package directory for all tests in this module.""" + import pdd + pdd_package_dir = Path(pdd.__file__).parent + monkeypatch.setenv("PDD_PATH", str(pdd_package_dir)) + + +class TestIssue154OverwriteE2E: + """ + E2E tests for Issue #154: Verify that file generation fails explicitly + when output path resolution fails, rather than silently skipping the write. + + These tests use Click's CliRunner to exercise the full CLI path. + """ + + @pytest.fixture + def project_dir(self, tmp_path: Path) -> Path: + """Create a minimal PDD project structure.""" + (tmp_path / "prompts").mkdir() + (tmp_path / "src").mkdir() + return tmp_path + + def test_generate_fails_when_output_path_is_empty( + self, project_dir: Path, monkeypatch + ): + """ + E2E Test: `pdd generate` should fail explicitly when output path + cannot be resolved, NOT silently skip the file write. + + This test simulates the bug trigger by mocking generate_output_paths + to return an empty dict, which causes output_file_paths.get("output") + to return None. + + Expected behavior (after fix): + - Command should fail with non-zero exit code + - Error message should indicate output path issue + - LLM should NOT be called (to avoid wasting money) + + Bug behavior (Issue #154): + - Command exits with code 0 + - File write is silently skipped + - Only a warning is printed to console + - LLM IS called (wasting money) + """ + # Create a simple prompt file + prompt_file = project_dir / "prompts" / "test_issue_154.prompt" + prompt_file.write_text( + "% You are a Python engineer.\n" + "% Write a function that returns 42.\n" + ) + + monkeypatch.chdir(project_dir) + monkeypatch.setenv("PDD_FORCE_LOCAL", "1") + monkeypatch.setenv("OPENAI_API_KEY", "fake-openai-key-for-testing") + + # Track if LLM was called (it shouldn't be when output path is invalid) + llm_called = False + + def mock_local_generator_func(*args, **kwargs): + """Mock the local code generator to track if it was called.""" + nonlocal llm_called + llm_called = True + return ("def answer():\n return 42", 0.01, "mock-model") + + def mock_construct_paths(*args, **kwargs): + """ + Mock construct_paths to return falsy output path. + + This simulates the downstream effect: when generate_output_paths + returns {}, construct_paths returns output_file_paths with + "output" key being None or empty string. + """ + return ( + {}, # resolved_config + {"prompt_file": "test content"}, # input_files + {"output": None}, # output_file_paths - THE BUG TRIGGER + "python" # language + ) + + with patch('pdd.code_generator_main.construct_paths', side_effect=mock_construct_paths): + with patch('pdd.code_generator_main.local_code_generator_func', side_effect=mock_local_generator_func): + from pdd import cli + + runner = CliRunner() + result = runner.invoke(cli.cli, [ + "--local", + "--force", + "generate", + str(prompt_file), + ], catch_exceptions=False) + + # THE KEY ASSERTIONS - These FAIL on buggy code + + # BUG CHECK 1: Exit code should be non-zero when output path fails + # CURRENT BUG: Exit code is 0 (success) even though file wasn't written + assert result.exit_code != 0, ( + f"BUG DETECTED (Issue #154): Command exited with code 0 despite no file being written!\n\n" + f"When output_path cannot be resolved, the generate command should fail explicitly.\n" + f"Instead, it silently skipped the file write and exited successfully.\n\n" + f"Exit code: {result.exit_code}\n" + f"Output:\n{result.output}\n\n" + f"Expected: Non-zero exit code indicating failure\n" + f"Actual: Exit code 0 (success) with silent warning" + ) + + # BUG CHECK 2: LLM should NOT be called when output path is invalid + # CURRENT BUG: LLM IS called, wasting money on code that won't be written + assert not llm_called, ( + f"BUG DETECTED (Issue #154): LLM was called despite invalid output path!\n\n" + f"The output path should be validated BEFORE calling the expensive LLM.\n" + f"Currently, money is wasted generating code that will never be written.\n\n" + f"Expected: LLM not called when output path is invalid\n" + f"Actual: LLM was called" + ) + + def test_generate_fails_when_output_path_is_empty_string( + self, project_dir: Path, monkeypatch + ): + """ + E2E Test: `pdd generate` should fail when output path is empty string. + + Similar to the None case, but tests the empty string "" path. + This can happen when generate_output_paths returns {} and + output_file_paths.get("output", "") is used. + """ + prompt_file = project_dir / "prompts" / "test_issue_154_empty.prompt" + prompt_file.write_text( + "% You are a Python engineer.\n" + "% Write a hello world function.\n" + ) + + monkeypatch.chdir(project_dir) + monkeypatch.setenv("PDD_FORCE_LOCAL", "1") + monkeypatch.setenv("OPENAI_API_KEY", "fake-openai-key-for-testing") + + def mock_local_generator_func(*args, **kwargs): + return ("def hello():\n print('Hello')", 0.01, "mock-model") + + def mock_construct_paths(*args, **kwargs): + """Mock with empty string output path.""" + return ( + {}, + {"prompt_file": "test content"}, + {"output": ""}, # Empty string - also triggers the bug + "python" + ) + + with patch('pdd.code_generator_main.construct_paths', side_effect=mock_construct_paths): + with patch('pdd.code_generator_main.local_code_generator_func', side_effect=mock_local_generator_func): + from pdd import cli + + runner = CliRunner() + result = runner.invoke(cli.cli, [ + "--local", + "--force", + "generate", + str(prompt_file), + ], catch_exceptions=False) + + # Exit code should indicate failure + assert result.exit_code != 0, ( + f"BUG DETECTED (Issue #154): Command succeeded with empty output path!\n\n" + f"Exit code: {result.exit_code}\n" + f"Output:\n{result.output}" + ) + + def test_generate_succeeds_when_output_path_is_valid( + self, project_dir: Path, monkeypatch + ): + """ + Control test: Verify generate works correctly when output path is valid. + + This ensures the fix doesn't break normal operation. + """ + prompt_file = project_dir / "prompts" / "test_valid.prompt" + prompt_file.write_text( + "% You are a Python engineer.\n" + "% Write a simple function.\n" + ) + + output_file = project_dir / "src" / "test_valid.py" + + monkeypatch.chdir(project_dir) + monkeypatch.setenv("PDD_FORCE_LOCAL", "1") + monkeypatch.setenv("OPENAI_API_KEY", "fake-openai-key-for-testing") + + def mock_local_generator_func(*args, **kwargs): + return ("def simple():\n return True", 0.01, "mock-model") + + def mock_construct_paths(*args, **kwargs): + """Mock with valid output path.""" + return ( + {}, + {"prompt_file": "test content"}, + {"output": str(output_file)}, # Valid path - should work + "python" + ) + + with patch('pdd.code_generator_main.construct_paths', side_effect=mock_construct_paths): + with patch('pdd.code_generator_main.local_code_generator_func', side_effect=mock_local_generator_func): + from pdd import cli + + runner = CliRunner() + result = runner.invoke(cli.cli, [ + "--local", + "--force", + "generate", + str(prompt_file), + ], catch_exceptions=False) + + # With valid output path, command should succeed + assert result.exit_code == 0, ( + f"Generate should succeed with valid output path.\n" + f"Exit code: {result.exit_code}\n" + f"Output:\n{result.output}" + ) + + # File should be created + assert output_file.exists(), ( + f"Output file should be created when path is valid.\n" + f"Expected: {output_file}" + ) + + +class TestIssue154OverwriteConfirmationE2E: + """ + E2E tests verifying that overwrite confirmation is NOT shown when + the file write will be skipped anyway. + + This tests the architectural issue: overwrite confirmation happens in + construct_paths, but the actual write happens later with additional + conditional gates. If the write will be skipped, confirmation is misleading. + """ + + @pytest.fixture + def project_dir(self, tmp_path: Path) -> Path: + """Create a minimal PDD project structure.""" + (tmp_path / "prompts").mkdir() + (tmp_path / "src").mkdir() + return tmp_path + + def test_no_overwrite_prompt_when_output_path_invalid( + self, project_dir: Path, monkeypatch + ): + """ + E2E Test: User should NOT be asked to confirm overwrite when the + write will be skipped anyway due to invalid output path. + + This tests the architectural disconnect between confirmation and write. + + Bug behavior: + - User confirms overwrite (or uses --force) + - LLM generates code (costs money) + - Write is silently skipped due to invalid output path + - User was misled into thinking their file would be overwritten + + Expected fix: + - Validate output path BEFORE showing overwrite confirmation + - Or raise error immediately when output path is invalid + """ + # Create an existing file that would normally trigger overwrite prompt + existing_file = project_dir / "src" / "existing.py" + existing_file.parent.mkdir(parents=True, exist_ok=True) + existing_file.write_text("# This file already exists\n") + original_content = existing_file.read_text() + + prompt_file = project_dir / "prompts" / "test_overwrite.prompt" + prompt_file.write_text( + "% You are a Python engineer.\n" + "% Write a new implementation.\n" + ) + + monkeypatch.chdir(project_dir) + monkeypatch.setenv("PDD_FORCE_LOCAL", "1") + monkeypatch.setenv("OPENAI_API_KEY", "fake-openai-key-for-testing") + + overwrite_confirmed = False + + def mock_construct_paths(*args, **kwargs): + """ + Mock construct_paths to: + 1. Simulate that overwrite confirmation was shown/passed + 2. Return invalid output path (simulating downstream bug) + """ + nonlocal overwrite_confirmed + # In the real code, construct_paths handles overwrite confirmation + # We simulate that it passed (user said yes or --force was used) + overwrite_confirmed = True + return ( + {}, + {"prompt_file": "test content"}, + {"output": None}, # But output path is invalid! + "python" + ) + + def mock_local_generator_func(*args, **kwargs): + return ("def new_impl():\n return 'new'", 0.01, "mock-model") + + with patch('pdd.code_generator_main.construct_paths', side_effect=mock_construct_paths): + with patch('pdd.code_generator_main.local_code_generator_func', side_effect=mock_local_generator_func): + from pdd import cli + + runner = CliRunner() + result = runner.invoke(cli.cli, [ + "--local", + "--force", # Simulates user confirming overwrite + "generate", + str(prompt_file), + ], catch_exceptions=False) + + # The existing file should NOT have been modified + # (since write was skipped due to invalid output path) + assert existing_file.read_text() == original_content, ( + "Existing file should not be modified when output path is invalid" + ) + + # But the command should have failed, not silently succeeded + assert result.exit_code != 0, ( + f"BUG DETECTED (Issue #154): Command succeeded despite invalid output path!\n\n" + f"The user was asked to confirm overwrite (or used --force),\n" + f"but the file was never actually written.\n" + f"This is misleading - if write will be skipped, don't ask for confirmation.\n\n" + f"Exit code: {result.exit_code}\n" + f"Output:\n{result.output}" + )