diff --git a/pdd/fix_error_loop.py b/pdd/fix_error_loop.py index 218860311..9b947ad44 100644 --- a/pdd/fix_error_loop.py +++ b/pdd/fix_error_loop.py @@ -310,10 +310,21 @@ def fix_error_loop(unit_test_file: str, # Check if unit_test_file and code_file exist. if not os.path.isfile(unit_test_file): rprint(f"[red]Error:[/red] Unit test file '{unit_test_file}' does not exist.") - return False, "", "", 0, 0.0, "" + return False, "", "", 0, 0.0, "", False, False if not os.path.isfile(code_file): rprint(f"[red]Error:[/red] Code file '{code_file}' does not exist.") - return False, "", "", 0, 0.0, "" + return False, "", "", 0, 0.0, "", False, False + + # Read original file contents at the START for modification detection (Issue #232) + try: + with open(unit_test_file, "r") as f: + original_test_content = f.read() + with open(code_file, "r") as f: + original_code_content = f.read() + except Exception as e: + rprint(f"[red]Error:[/red] Could not read original file contents: {e}") + return False, "", "", 0, 0.0, "", False, False + if verbose: rprint("[cyan]Starting fix error loop process.[/cyan]") @@ -325,7 +336,7 @@ def fix_error_loop(unit_test_file: str, rprint(f"[green]Removed old error log file:[/green] {error_log_file}") except Exception as e: rprint(f"[red]Error:[/red] Could not remove error log file: {e}") - return False, "", "", 0, 0.0, "" + return False, "", "", 0, 0.0, "", False, False # Initialize structured log log_structure = { @@ -401,7 +412,10 @@ def fix_error_loop(unit_test_file: str, final_code = f.read() except Exception: pass - return success, final_unit_test, final_code, 1, agent_cost, agent_model + # Detect modifications by comparing with original content + test_modified = (final_unit_test != original_test_content) if final_unit_test else False + code_modified = (final_code != original_code_content) if final_code else False + return success, final_unit_test, final_code, 1, agent_cost, agent_model, test_modified, code_modified verify_result = subprocess.run(verify_cmd, capture_output=True, text=True, shell=True, stdin=subprocess.DEVNULL) pytest_output = (verify_result.stdout or "") + "\n" + (verify_result.stderr or "") @@ -456,10 +470,13 @@ def fix_error_loop(unit_test_file: str, final_code = f.read() except Exception: pass - return success, final_unit_test, final_code, 1, agent_cost, agent_model + # Detect modifications by comparing with original content + test_modified = (final_unit_test != original_test_content) if final_unit_test else False + code_modified = (final_code != original_code_content) if final_code else False + return success, final_unit_test, final_code, 1, agent_cost, agent_model, test_modified, code_modified else: # Agentic fallback disabled, return failure - return False, "", "", fix_attempts, total_cost, model_name + return False, "", "", fix_attempts, total_cost, model_name, False, False # If target is not a Python file, trigger agentic fallback if tests fail if not is_python: @@ -496,7 +513,10 @@ def fix_error_loop(unit_test_file: str, final_code = f.read() except Exception: pass - return success, final_unit_test, final_code, 1, agent_cost, agent_model + # Detect modifications by comparing with original content + test_modified = (final_unit_test != original_test_content) if final_unit_test else False + code_modified = (final_code != original_code_content) if final_code else False + return success, final_unit_test, final_code, 1, agent_cost, agent_model, test_modified, code_modified else: # Non-python tests passed, so we are successful. rprint("[green]Non-Python tests passed. No fix needed.[/green]") @@ -507,7 +527,8 @@ def fix_error_loop(unit_test_file: str, final_code = f.read() except Exception as e: rprint(f"[yellow]Warning: Could not read final files: {e}[/yellow]") - return True, final_unit_test, final_code, 0, 0.0, "N/A" + # No modifications since tests already passed + return True, final_unit_test, final_code, 0, 0.0, "N/A", False, False fails, errors, warnings = initial_fails, initial_errors, initial_warnings @@ -916,7 +937,11 @@ def fix_error_loop(unit_test_file: str, rprint(f"[yellow]Warning: Could not read files after successful agentic fix: {e}[/yellow]") success = True - return success, final_unit_test, final_code, fix_attempts, total_cost, model_name + # Detect modifications by comparing with original content (Issue #232) + test_modified = (final_unit_test != original_test_content) if final_unit_test else False + code_modified = (final_code != original_code_content) if final_code else False + + return success, final_unit_test, final_code, fix_attempts, total_cost, model_name, test_modified, code_modified # If this module is run directly for testing purposes: if __name__ == "__main__": @@ -932,9 +957,10 @@ def fix_error_loop(unit_test_file: str, error_log_file = "error_log.txt" verbose = True - success, final_unit_test, final_code, attempts, total_cost, model_name = fix_error_loop( + success, final_unit_test, final_code, attempts, total_cost, model_name, test_modified, code_modified = fix_error_loop( unit_test_file, code_file, + "dummy_prompt.txt", prompt, verification_program, strength, @@ -950,4 +976,6 @@ def fix_error_loop(unit_test_file: str, rprint(f"Attempts: {attempts}") rprint(f"Total cost: ${total_cost:.6f}") rprint(f"Model used: {model_name}") + rprint(f"Test file modified: {test_modified}") + rprint(f"Code file modified: {code_modified}") rprint(f"Final unit test contents:\n{final_unit_test}") diff --git a/pdd/fix_main.py b/pdd/fix_main.py index e6a2ef5df..a8e3f0d25 100644 --- a/pdd/fix_main.py +++ b/pdd/fix_main.py @@ -307,7 +307,7 @@ def fix_main( mode_desc = "hybrid (local tests + cloud LLM)" if use_cloud_for_loop else "local" console.print(Panel(f"Performing {mode_desc} fix loop...", title="[blue]Mode[/blue]", expand=False)) - success, fixed_unit_test, fixed_code, attempts, total_cost, model_name = fix_error_loop( + success, fixed_unit_test, fixed_code, attempts, total_cost, model_name, test_modified, code_modified = fix_error_loop( unit_test_file=unit_test_file, code_file=code_file, prompt_file=prompt_file, @@ -417,8 +417,22 @@ def fix_main( rprint(f"[bold red]Error printing analysis preview: {e}[/bold red]") if success: rprint("[bold green]Fixed files saved:[/bold green]") - rprint(f" Test file: {output_file_paths['output_test']}") - rprint(f" Code file: {output_file_paths['output_code']}") + + # Issue #232: Print file paths based on mode and modification/update flags + if loop: + # Loop mode: Only print files that were actually modified + if test_modified and fixed_unit_test: + rprint(f" Test file: {output_file_paths['output_test']}") + if code_modified and fixed_code: + rprint(f" Code file: {output_file_paths['output_code']}") + else: + # Non-loop mode: Print files where update flag is True AND content exists + if update_unit_test and fixed_unit_test: + rprint(f" Test file: {output_file_paths['output_test']}") + if update_code and fixed_code: + rprint(f" Code file: {output_file_paths['output_code']}") + + # Always print results file if it exists if output_file_paths.get("output_results"): rprint(f" Results file: {output_file_paths['output_results']}") diff --git a/tests/test_fix_error_loop.py b/tests/test_fix_error_loop.py index b6f630c5a..5ec353b9f 100644 --- a/tests/test_fix_error_loop.py +++ b/tests/test_fix_error_loop.py @@ -112,8 +112,8 @@ def patched_fix_error_loop( with open(code_file, "r") as f: final_code = f.read() - # Always return success and 1 attempt - return True, final_unit_test, final_code, 1, cost, model + # Always return success and 1 attempt (8 values including modification flags) + return True, final_unit_test, final_code, 1, cost, model, updated_unit_test, updated_code try: # Replace the original function with our patched version @@ -133,16 +133,16 @@ def patched_fix_error_loop( # Write the fixed code to the file before calling fix_error_loop files["code_file"].write_text(fixed_code) - success, final_test, final_code, attempts, cost, model = ( + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = ( pdd.fix_error_loop.fix_error_loop( unit_test_file=str(files["test_file"]), code_file=str(files["code_file"]), prompt_file="dummy_prompt.txt", prompt="Test prompt", verification_program=str(files["verify_file"]), - strength=0.5, - temperature=0.0, - max_attempts=3, + strength=0.5, + temperature=0.0, + max_attempts=3, budget=10.0, error_log_file=str(files["error_log"]), agentic_fallback=False @@ -179,7 +179,7 @@ def test_add(): assert add(-1, 1) == 0 """) # Call fix_code_loop - success, final_test, final_code, attempts, cost, model = fix_error_loop( + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( unit_test_file=str(files["test_file"]), code_file=str(files["code_file"]), prompt_file="dummy_prompt.txt", @@ -234,7 +234,7 @@ def test_max_attempts_exceeded(setup_files): False, False, "", "", "No analysis", 0.0, "mock-model" ) - success, final_test, final_code, attempts, cost, model = fix_error_loop( + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( unit_test_file=str(files["test_file"]), code_file=str(files["code_file"]), prompt_file="dummy_prompt.txt", @@ -297,7 +297,7 @@ def test_verification_failure(setup_files): "mock-model" ) - success, final_test, final_code, attempts, cost, model = ( + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = ( fix_error_loop( unit_test_file=str(files["test_file"]), code_file=str(files["code_file"]), @@ -352,9 +352,9 @@ def test_backup_creation(setup_files): prompt_file="dummy_prompt.txt", prompt="Test prompt", verification_program=str(files["verify_file"]), - strength=0.5, - temperature=0.0, - max_attempts=1, + strength=0.5, + temperature=0.0, + max_attempts=1, budget=10.0, error_log_file=str(files["error_log"]), agentic_fallback=False @@ -382,9 +382,9 @@ def test_missing_files(): prompt_file="dummy_prompt.txt", prompt="prompt", verification_program="verify.py", - strength=0.5, - temperature=0.0, - max_attempts=3, + strength=0.5, + temperature=0.0, + max_attempts=3, budget=10.0, agentic_fallback=False ) @@ -414,7 +414,7 @@ def test_non_python_triggers_agentic_fallback_success(tmp_path): patch("pdd.fix_error_loop.run_pytest_on_file") as mock_pytest: mock_agent.return_value = (True, "ok") # Act - success, final_test, final_code, attempts, cost, model = fix_error_loop( + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( unit_test_file=str(unit_test_file), code_file=str(code_file), prompt_file="dummy_prompt.txt", @@ -462,7 +462,7 @@ def test_non_python_triggers_agentic_fallback_failure(tmp_path): with patch("pdd.fix_error_loop.run_agentic_fix") as mock_agent, \ patch("pdd.fix_error_loop.run_pytest_on_file") as mock_pytest: mock_agent.return_value = (False, "not ok") - success, final_test, final_code, attempts, cost, model = fix_error_loop( + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( unit_test_file=str(unit_test_file), code_file=str(code_file), prompt_file="dummy_prompt.txt", @@ -490,23 +490,23 @@ def test_non_python_triggers_agentic_fallback_failure(tmp_path): class BrokenStdin: def fileno(self): raise ValueError("redirected stdin is pseudofile, has no fileno()") - + def read(self, size=-1): return "" def test_subprocess_safe_stdin_in_run_pytest_on_file(tmp_path): """ - Regression test ensuring the fix propagates to the higher-level helper + Regression test ensuring the fix propagates to the higher-level helper used by fix_error_loop. """ - # We need to import run_pytest_on_file locally if not exposed, + # We need to import run_pytest_on_file locally if not exposed, # but it is available from pdd.fix_error_loop from pdd.fix_error_loop import run_pytest_on_file - + test_file = tmp_path / "test_dummy_2.py" test_file.write_text("def test_pass(): assert True", encoding="utf-8") - + with patch('sys.stdin', BrokenStdin()): try: fails, errors, warnings, logs = run_pytest_on_file(str(test_file)) @@ -528,7 +528,7 @@ def test_fix_loop_reloading(tmp_path): # 1. Create the module module_file = tmp_path / "target_module.py" module_file.write_text("def target_func(): return 1\n", encoding="utf-8") - + # 2. Create the test that expects return value 2 (so it fails initially) test_file = tmp_path / "test_target.py" test_file.write_text(""" @@ -541,17 +541,17 @@ def test_fix_loop_reloading(tmp_path): def test_func(): assert target_func() == 2 """, encoding="utf-8") - + # 3. Run first time -> SHOULD FAIL # run_pytest_on_file returns (fails, errors, warnings, logs) fails, errors, warnings, logs = run_pytest_on_file(str(test_file)) assert fails == 1, "Test should have failed initially" - + # 4. Modify the module on disk to pass the test # Wait a tiny bit to ensure filesystem timestamp update if needed (usually fine) - time.sleep(0.1) + time.sleep(0.1) module_file.write_text("def target_func(): return 2\n", encoding="utf-8") - + # 5. Run second time -> SHOULD PASS # If module caching was active (old behavior), this would fail again. fails_2, errors_2, warnings_2, logs_2 = run_pytest_on_file(str(test_file)) @@ -669,7 +669,7 @@ def test_run_report_discrepancy_causes_infinite_loop_bug(setup_files): # (even though sync_orchestration's run_report showed 3 failures) mock_pytest.return_value = (0, 0, 0, "All tests passed") - success, final_test, final_code, attempts, cost, model = fix_error_loop( + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( unit_test_file=str(files["test_file"]), code_file=str(files["code_file"]), prompt_file="dummy_prompt.txt", @@ -789,7 +789,7 @@ def capture_cwd_mock(**kwargs): # Mock subprocess for verification program mock_subprocess.return_value = subprocess.CompletedProcess(args=[], returncode=0, stdout="", stderr="") - success, final_test, final_code, attempts, cost, model = fix_error_loop( + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( unit_test_file=str(test_file), code_file=str(code_file), prompt_file=str(prompt_file), @@ -820,6 +820,124 @@ def capture_cwd_mock(**kwargs): f"This causes path resolution failures when prompt is in a subdirectory!" +# ============================================================================ +# Bug Fix Tests - Issue #232: Modification Detection +# ============================================================================ + +def test_fix_error_loop_returns_modification_flags_only_test_modified(setup_files): + """ + TEST (Issue #232): fix_error_loop should return test_file_was_modified=True + and code_file_was_modified=False when only the test file is modified. + + This ensures the modification detection correctly identifies which files changed. + """ + files = setup_files + + # Set original content (both files exist) + original_test = "def test_add():\n assert add(2, 3) == 6 # WRONG\n" + original_code = "def add(a, b): return a + b # CORRECT\n" + + files["test_file"].write_text(original_test) + files["code_file"].write_text(original_code) + + with patch("pdd.fix_error_loop.run_pytest_on_file") as mock_pytest, \ + patch("pdd.fix_error_loop.fix_errors_from_unit_tests") as mock_fix: + + # Initial test fails + # After fix, test passes + mock_pytest.side_effect = [ + (1, 0, 0, "test failed"), # Initial run + (0, 0, 0, "test passed"), # After fix + ] + + # Mock fix_errors_from_unit_tests to modify ONLY test file + fixed_test = "def test_add():\n assert add(2, 3) == 5 # FIXED\n" + mock_fix.return_value = ( + True, # update_unit_test - test was modified + False, # update_code - code was NOT modified + fixed_test, + original_code, # Code unchanged + "Fixed test assertion", + 0.1, + "mock-model" + ) + + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( + unit_test_file=str(files["test_file"]), + code_file=str(files["code_file"]), + prompt_file="dummy_prompt.txt", + prompt="Test prompt", + verification_program=str(files["verify_file"]), + strength=0.5, + temperature=0.0, + max_attempts=3, + budget=10.0, + error_log_file=str(files["error_log"]), + verbose=False, + agentic_fallback=False + ) + + # Assert: Only test file was modified + assert test_modified is True, "Test file should be detected as modified" + assert code_modified is False, "Code file should NOT be detected as modified" + assert success is True + + +def test_fix_error_loop_returns_modification_flags_only_code_modified(setup_files): + """ + TEST (Issue #232): fix_error_loop should return test_file_was_modified=False + and code_file_was_modified=True when only the code file is modified. + """ + files = setup_files + + # Set original content (both files exist) + original_test = "def test_add():\n assert add(2, 3) == 5\n" + original_code = "def add(a, b): return a + b + 1 # BUG\n" + + files["test_file"].write_text(original_test) + files["code_file"].write_text(original_code) + + with patch("pdd.fix_error_loop.run_pytest_on_file") as mock_pytest, \ + patch("pdd.fix_error_loop.fix_errors_from_unit_tests") as mock_fix: + + mock_pytest.side_effect = [ + (1, 0, 0, "test failed"), # Initial run + (0, 0, 0, "test passed"), # After fix + ] + + # Mock fix_errors_from_unit_tests to modify ONLY code file + fixed_code = "def add(a, b): return a + b # FIXED\n" + mock_fix.return_value = ( + False, # update_unit_test - test was NOT modified + True, # update_code - code was modified + original_test, # Test unchanged + fixed_code, + "Fixed code bug", + 0.1, + "mock-model" + ) + + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( + unit_test_file=str(files["test_file"]), + code_file=str(files["code_file"]), + prompt_file="dummy_prompt.txt", + prompt="Test prompt", + verification_program=str(files["verify_file"]), + strength=0.5, + temperature=0.0, + max_attempts=3, + budget=10.0, + error_log_file=str(files["error_log"]), + verbose=False, + agentic_fallback=False + ) + + # Assert: Only code file was modified + assert test_modified is False, "Test file should NOT be detected as modified" + assert code_modified is True, "Code file should be detected as modified" + assert success is True + + # ============================================================================ # Bug Fix Tests - Issue #266: Early Returns Bypass Agentic Fallback # ============================================================================ @@ -861,7 +979,7 @@ def test_pytest_exception_triggers_agentic_fallback(setup_files): # Agentic fallback should succeed mock_agentic.return_value = (True, "Fixed by agentic", 0.5, "claude-cli", []) - success, final_test, final_code, attempts, cost, model = fix_error_loop( + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( unit_test_file=str(files["test_file"]), code_file=str(files["code_file"]), prompt_file="dummy_prompt.txt", @@ -907,7 +1025,7 @@ def failing_copy(*args, **kwargs): # Agentic fallback should succeed mock_agentic.return_value = (True, "Fixed by agentic", 0.5, "claude-cli", []) - success, final_test, final_code, attempts, cost, model = fix_error_loop( + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( unit_test_file=str(files["test_file"]), code_file=str(files["code_file"]), prompt_file="dummy_prompt.txt", @@ -960,7 +1078,7 @@ def failing_open(path, mode="r", *args, **kwargs): # Agentic fallback should succeed mock_agentic.return_value = (True, "Fixed by agentic", 0.5, "claude-cli", []) - success, final_test, final_code, attempts, cost, model = fix_error_loop( + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( unit_test_file=str(files["test_file"]), code_file=str(files["code_file"]), prompt_file="dummy_prompt.txt", @@ -999,7 +1117,7 @@ def test_agentic_fallback_not_called_when_disabled(setup_files): mock_pytest.return_value = (1, 0, 0, "Test failure") mock_fix.return_value = (False, False, "", "", "No fix", 0.1, "mock-model") - success, final_test, final_code, attempts, cost, model = fix_error_loop( + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( unit_test_file=str(files["test_file"]), code_file=str(files["code_file"]), prompt_file="dummy_prompt.txt", @@ -1040,7 +1158,7 @@ def test_agentic_fallback_success_after_loop_failure(setup_files): # Agentic fallback succeeds mock_agentic.return_value = (True, "Fixed by agentic", 0.5, "claude-cli", ["code.py"]) - success, final_test, final_code, attempts, cost, model = fix_error_loop( + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( unit_test_file=str(files["test_file"]), code_file=str(files["code_file"]), prompt_file="dummy_prompt.txt", @@ -1087,7 +1205,7 @@ def test_initial_test_exception_triggers_agentic_fallback(setup_files): # Agentic fallback should succeed mock_agentic.return_value = (True, "Fixed by agentic", 0.5, "claude-cli", []) - success, final_test, final_code, attempts, cost, model = fix_error_loop( + success, final_test, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( unit_test_file=str(files["test_file"]), code_file=str(files["code_file"]), prompt_file="dummy_prompt.txt", diff --git a/tests/test_fix_main.py b/tests/test_fix_main.py index ea0b8ea74..4ae5c8a02 100644 --- a/tests/test_fix_main.py +++ b/tests/test_fix_main.py @@ -200,7 +200,9 @@ def test_fix_main_with_loop( "Iteratively fixed code", 2, # attempts 1.25, # total_cost - "gpt-4-loop" + "gpt-4-loop", + True, # test_modified + True # code_modified ) # Use mock_open for file writing assertions @@ -299,7 +301,9 @@ def test_fix_main_passes_agentic_fallback_to_fix_error_loop( "Fixed code", 1, 0.5, - "gpt-4" + "gpt-4", + True, # test_modified + True # code_modified ) m_open = mock_open() @@ -714,7 +718,7 @@ def test_fix_main_passes_time_to_fix_error_loop( None ) - mock_fix_error_loop.return_value = (True, "test", "code", 1, 0.5, "gpt-4") + mock_fix_error_loop.return_value = (True, "test", "code", 1, 0.5, "gpt-4", True, True) m_open = mock_open() with patch('builtins.open', m_open): @@ -850,7 +854,7 @@ def test_fix_main_loop_mode_excludes_error_file_from_input_paths( None ) - mock_fix_error_loop.return_value = (True, "test", "code", 1, 0.5, "gpt-4") + mock_fix_error_loop.return_value = (True, "test", "code", 1, 0.5, "gpt-4", True, True) m_open = mock_open() with patch('builtins.open', m_open): @@ -944,7 +948,7 @@ def test_fix_main_loop_returns_multiple_attempts( ) # Simulate 5 attempts before success - mock_fix_error_loop.return_value = (True, "test", "code", 5, 2.50, "gpt-4") + mock_fix_error_loop.return_value = (True, "test", "code", 5, 2.50, "gpt-4", True, True) m_open = mock_open() with patch('builtins.open', m_open): @@ -1505,7 +1509,7 @@ def test_fix_main_loop_mode_with_local_flag_uses_local( 'python' ) - mock_fix_error_loop.return_value = (True, "fixed test", "fixed code", 2, 1.0, "local-model") + mock_fix_error_loop.return_value = (True, "fixed test", "fixed code", 2, 1.0, "local-model", True, True) m_open = mock_open() with patch('builtins.open', m_open): @@ -1551,7 +1555,7 @@ def test_fix_main_loop_mode_uses_hybrid_cloud_by_default( 'python' ) - mock_fix_error_loop.return_value = (True, "fixed test", "fixed code", 2, 1.0, "cloud-model") + mock_fix_error_loop.return_value = (True, "fixed test", "fixed code", 2, 1.0, "cloud-model", True, True) m_open = mock_open() with patch('builtins.open', m_open): @@ -1921,7 +1925,7 @@ def test_fix_error_loop_uses_cloud_when_use_cloud_true(mock_pytest, mock_cloud_f "cloud-model" ) - success, final_ut, final_code, attempts, cost, model = fix_error_loop( + success, final_ut, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( unit_test_file=unit_test_file, code_file=code_file, prompt_file="prompt.prompt", @@ -1982,7 +1986,7 @@ def test_fix_error_loop_uses_local_when_use_cloud_false(mock_pytest, mock_local_ "local-model" ) - success, final_ut, final_code, attempts, cost, model = fix_error_loop( + success, final_ut, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( unit_test_file=unit_test_file, code_file=code_file, prompt_file="prompt.prompt", @@ -2047,7 +2051,7 @@ def test_fix_error_loop_cloud_fallback_to_local(mock_pytest, mock_cloud_fix, moc "local-fallback-model" ) - success, final_ut, final_code, attempts, cost, model = fix_error_loop( + success, final_ut, final_code, attempts, cost, model, test_modified, code_modified = fix_error_loop( unit_test_file=unit_test_file, code_file=code_file, prompt_file="prompt.prompt", @@ -2185,21 +2189,27 @@ def test_sum_list(): 'confirm_callback': None } - success, fixed_test, fixed_code, attempts, cost, model = fix_main( - ctx=ctx, - prompt_file=str(prompt_file), - code_file=str(code_file), - unit_test_file=str(unit_test_file), - error_file=str(error_file), - output_test=str(output_test), - output_code=str(output_code), - output_results=None, - loop=False, - verification_program=None, - max_attempts=3, - budget=5.0, - auto_submit=False - ) + try: + success, fixed_test, fixed_code, attempts, cost, model = fix_main( + ctx=ctx, + prompt_file=str(prompt_file), + code_file=str(code_file), + unit_test_file=str(unit_test_file), + error_file=str(error_file), + output_test=str(output_test), + output_code=str(output_code), + output_results=None, + loop=False, + verification_program=None, + max_attempts=3, + budget=5.0, + auto_submit=False + ) + except click.UsageError as e: + # Cloud endpoint might be unavailable even if credentials exist + if "Cloud" in str(e) and ("network" in str(e).lower() or "HTTP error" in str(e)): + pytest.skip(f"Cloud endpoint unavailable: {e}") + raise # Capture output to check for cloud success captured = capsys.readouterr() @@ -2348,3 +2358,152 @@ def test_fix_errors_prompt_preserves_all_existing_tests(): "Prompt must explicitly forbid removing test functions" assert "every single test function from the input" in prompt_content, \ "Prompt must require output to include every input test function" + + +# ============================================================================ +# Bug Fix Tests - Issue #232: Conditional Output Printing +# ============================================================================ + +@pytest.mark.parametrize("test_modified,code_modified,expect_test,expect_code", [ + (True, False, True, False), # Only test file modified + (False, True, False, True), # Only code file modified + (True, True, True, True), # Both files modified + (False, False, False, False), # Neither file modified (tests pass immediately) +]) +@patch('pdd.fix_main.construct_paths') +@patch('pdd.fix_main.fix_error_loop') +@patch('pdd.fix_main.rprint') +def test_fix_main_loop_mode_prints_only_modified_files( + mock_rprint, + mock_fix_error_loop, + mock_construct_paths, + mock_ctx, + test_modified, + code_modified, + expect_test, + expect_code +): + """ + TEST (Issue #232): In loop mode, fix_main should only print paths for files + that were actually modified (based on content comparison). + """ + mock_construct_paths.return_value = ( + {}, + {'prompt_file': 'p', 'code_file': 'c', 'unit_test_file': 't'}, + { + 'output_test': 'output/test_fixed.py', + 'output_code': 'output/code_fixed.py', + 'output_results': 'results/fix_results.log' + }, + None + ) + + mock_fix_error_loop.return_value = ( + True, + "test content", + "code content", + 2 if (test_modified or code_modified) else 0, + 1.25 if (test_modified or code_modified) else 0.0, + "gpt-4" if (test_modified or code_modified) else "", + test_modified, + code_modified + ) + + m_open = mock_open() + with patch('builtins.open', m_open): + fix_main( + ctx=mock_ctx, + prompt_file="p.prompt", + code_file="c.py", + unit_test_file="t.py", + error_file="e.log", + output_test=None, + output_code=None, + output_results=None, + loop=True, + verification_program="verify.py", + max_attempts=3, + budget=5.0, + auto_submit=False + ) + + rprint_output = " ".join(str(call) for call in mock_rprint.call_args_list) + + assert ("output/test_fixed.py" in rprint_output) == expect_test + assert ("output/code_fixed.py" in rprint_output) == expect_code + assert "results/fix_results.log" in rprint_output # Always printed + + +@pytest.mark.parametrize("update_test,update_code,test_content,code_content,expect_test,expect_code", [ + (True, False, "fixed test", "", True, False), # Only test updated with content + (False, True, "", "fixed code", False, True), # Only code updated with content + (True, True, "", "", False, False), # Both flagged but no content +]) +@patch('pdd.fix_main.Path') +@patch('pdd.fix_main.construct_paths') +@patch('pdd.fix_main.fix_errors_from_unit_tests') +@patch('pdd.fix_main.rprint') +def test_fix_main_non_loop_mode_uses_update_flags_and_checks_content( + mock_rprint, + mock_fix_errors, + mock_construct_paths, + mock_path, + mock_ctx, + update_test, + update_code, + test_content, + code_content, + expect_test, + expect_code +): + """ + TEST (Issue #232): In non-loop mode, fix_main should use update flags + (not modification flags) AND check that content is non-empty before printing paths. + This ensures non-loop mode behavior is unchanged by issue #232 fix. + """ + mock_ctx.obj['local'] = True + mock_path.return_value.exists.return_value = True + + mock_construct_paths.return_value = ( + {}, + {'prompt_file': 'p', 'code_file': 'c', 'unit_test_file': 't', 'error_file': 'e'}, + { + 'output_test': 'output/test_fixed.py', + 'output_code': 'output/code_fixed.py', + 'output_results': 'results/fix_results.log' + }, + None + ) + + mock_fix_errors.return_value = ( + update_test, + update_code, + test_content, + code_content, + "Analysis", + 0.5, + "gpt-4" + ) + + m_open = mock_open() + with patch('builtins.open', m_open): + fix_main( + ctx=mock_ctx, + prompt_file="p.prompt", + code_file="c.py", + unit_test_file="t.py", + error_file="e.log", + output_test=None, + output_code=None, + output_results=None, + loop=False, + verification_program=None, + max_attempts=3, + budget=5.0, + auto_submit=False + ) + + rprint_output = " ".join(str(call) for call in mock_rprint.call_args_list) + + assert ("output/test_fixed.py" in rprint_output) == expect_test + assert ("output/code_fixed.py" in rprint_output) == expect_code