diff --git a/amplifier_module_context_simple/__init__.py b/amplifier_module_context_simple/__init__.py index e064e0d..fc0ffdb 100644 --- a/amplifier_module_context_simple/__init__.py +++ b/amplifier_module_context_simple/__init__.py @@ -912,19 +912,35 @@ def _check_tool_pair_removable( assistant_msg: dict[str, Any], protected_indices: set[int], ) -> tuple[bool, list[int]]: - """Check if all tool results for an assistant can be removed. Returns (all_removable, result_indices).""" + """Check if all tool results for an assistant can be removed. Returns (all_removable, result_indices). + + CRITICAL FIX: This function now verifies that ALL tool_calls have corresponding tool_results. + If any tool_call is missing its result, we must NOT remove the assistant, because: + 1. The tool result might be added later (e.g., user types "continue" during execution) + 2. Removing the assistant would orphan that future tool result + 3. This causes: "tool_use ids were found without tool_result blocks" error + """ all_removable = True tool_result_indices = [] for tc in assistant_msg.get("tool_calls", []): tc_id = tc.get("id") or tc.get("tool_call_id") if tc_id: + found = False # Track if we found this specific result for k, m in enumerate(messages): if m.get("tool_call_id") == tc_id: + found = True # Mark as found if k in protected_indices: all_removable = False else: tool_result_indices.append(k) + + # CRITICAL: If we didn't find a result for this tool_call, + # we CANNOT remove the assistant safely. The tool result might + # be added later, and removing the assistant now would create + # an orphaned tool_result with no matching tool_use. + if not found: + all_removable = False return all_removable, tool_result_indices diff --git a/tests/test_tool_pair_compaction.py b/tests/test_tool_pair_compaction.py index 2c2155d..9cf6787 100644 --- a/tests/test_tool_pair_compaction.py +++ b/tests/test_tool_pair_compaction.py @@ -281,3 +281,111 @@ async def test_compact_with_multiple_tool_calls_in_one_message(): f"Expected 6 tool results after assistant with 6 tool_calls, " f"but found only {tool_result_count}. This is the bug!" ) + + +@pytest.mark.asyncio +async def test_incomplete_tool_results_not_removed(): + """Test that assistant with incomplete tool results is NOT removed during compaction. + + Regression test for bug where compaction would remove assistant messages + even when not all tool_results had been added yet, causing orphaned + tool_results and API errors. + """ + context = SimpleContextManager( + max_tokens=1000, + compact_threshold=0.01, # Force compaction + target_usage=0.5, + ) + + # Scenario: Assistant makes 2 tool calls, but only 1 result added so far + await context.add_message({ + "role": "assistant", + "content": "Let me check both", + "tool_calls": [ + {"id": "call_A", "type": "function", "function": {"name": "tool_a", "arguments": "{}"}}, + {"id": "call_B", "type": "function", "function": {"name": "tool_b", "arguments": "{}"}}, + ] + }) + + # Only result A added + await context.add_message({ + "role": "tool", + "tool_call_id": "call_A", + "content": "Result from tool A" + }) + + # User interrupts (like typing "continue") + await context.add_message({ + "role": "user", + "content": "continue" + }) + + # Result B hasn't been added yet! + + # Force compaction + compacted = await context._compact_ephemeral(budget=1000, source_messages=context.messages) + + # CRITICAL: Assistant message should still be present + # because call_B is missing its result + assistant_messages = [m for m in compacted if m.get("role") == "assistant" and m.get("tool_calls")] + + assert len(assistant_messages) == 1, "Assistant with incomplete tool results should NOT be removed" + + # Verify the assistant message has both tool_calls + assert len(assistant_messages[0]["tool_calls"]) == 2 + + +@pytest.mark.asyncio +async def test_complete_tool_results_can_be_removed(): + """Test that assistant with ALL tool results CAN be removed during compaction.""" + context = SimpleContextManager( + max_tokens=1000, + compact_threshold=0.01, # Force compaction + target_usage=0.5, + ) + + # Scenario: Assistant makes 2 tool calls, BOTH results added + await context.add_message({ + "role": "assistant", + "content": "Let me check both", + "tool_calls": [ + {"id": "call_A", "type": "function", "function": {"name": "tool_a", "arguments": "{}"}}, + {"id": "call_B", "type": "function", "function": {"name": "tool_b", "arguments": "{}"}}, + ] + }) + + # Both results added + await context.add_message({ + "role": "tool", + "tool_call_id": "call_A", + "content": "Result A" + }) + await context.add_message({ + "role": "tool", + "tool_call_id": "call_B", + "content": "Result B" + }) + + # Add more messages to trigger compaction + for i in range(20): + await context.add_message({"role": "user", "content": f"Message {i}" * 100}) + await context.add_message({"role": "assistant", "content": f"Response {i}" * 100}) + + # Force compaction - should be able to remove the old tool pair + compacted = await context._compact_ephemeral(budget=1000, source_messages=context.messages) + + # The old assistant with tool_calls might be removed (that's OK now) + # We just verify no orphaned tool_results exist + tool_use_ids = set() + for msg in compacted: + if msg.get("tool_calls"): + for tc in msg["tool_calls"]: + tc_id = tc.get("id") or tc.get("tool_call_id") + if tc_id: + tool_use_ids.add(tc_id) + + # Check all tool results have matching tool_uses + for msg in compacted: + if msg.get("role") == "tool": + tc_id = msg.get("tool_call_id") + assert tc_id in tool_use_ids, f"Orphaned tool_result with tool_call_id: {tc_id}"