Recurse-ML · ab-10 · Jan 27, 2026
diff --git a/src/noot/cache.py b/src/noot/cache.py
@@ -114,9 +114,10 @@ def from_env(cls, cassette_path: Path | None = None) -> "Cache":
         cassette_exists = cassette_path and cassette_path.exists()
 
         if mode == RecordMode.ALL:
-            # Always record, clear any existing entries
+            # Always record, but preserve entries from other tests
             cache._should_record = True
-            cache._entries = []
+            if cassette_exists:
+                cache._load()  # Load existing entries to preserve other tests' entries
         elif mode == RecordMode.NONE:
             # Replay only
             cache._should_record = False
@@ -202,6 +203,9 @@ def put(
         """
         Store a response in the cache.
 
+        If an entry with the same instruction and method already exists,
+        it will be replaced (to support re-recording).
+
         Args:
             instruction: The instruction sent to the LLM
             screen: The screen content at time of request
@@ -212,15 +216,22 @@ def put(
         if not self._should_record:
             return
 
-        self._entries.append(
-            CacheEntry(
-                instruction=instruction,
-                screen=screen,
-                response=response,
-                method=method,
-                assertion_code=assertion_code,
-            )
+        new_entry = CacheEntry(
+            instruction=instruction,
+            screen=screen,
+            response=response,
+            method=method,
+            assertion_code=assertion_code,
         )
+
+        # Replace existing entry with same instruction/method, or append if new
+        for i, entry in enumerate(self._entries):
+            if entry.instruction == instruction and entry.method == method:
+                self._entries[i] = new_entry
+                self.save()
+                return
+
+        self._entries.append(new_entry)
         self.save()
 
     @property

diff --git a/tests/test_cache.py b/tests/test_cache.py
@@ -0,0 +1,210 @@
+"""Unit tests for the Cache class."""
+
+import json
+import os
+
+import pytest
+
+from noot.cache import Cache
+
+
+class TestCacheMultipleInstances:
+    """Test that multiple cache instances sharing the same cassette preserve entries."""
+
+    def test_multiple_cache_instances_preserve_entries_in_all_mode(self, tmp_path):
+        """
+        Bug fix test: Multiple cache instances sharing the same default cassette
+        should preserve each other's entries when using RECORD_MODE=all.
+
+        This reproduces the bug reported where:
+        1. test_first records its entries to default.json
+        2. test_second creates a new Cache, overwrites default.json
+        3. test_first's entries are lost
+
+        The fix ensures entries are loaded before recording in ALL mode.
+        """
+        cassette_path = tmp_path / "default.json"
+
+        # Simulate test_first: Create first cache instance and record entries
+        os.environ["RECORD_MODE"] = "all"
+        cache1 = Cache.from_env(cassette_path)
+        cache1.put(
+            instruction="shows version output",
+            screen="$ gt version\nv1.0.0",
+            response="assert 'v1.0.0' in screen",
+            method="expect",
+            assertion_code="assert 'v1.0.0' in screen",
+        )
+
+        # Verify first entry was saved
+        data = json.loads(cassette_path.read_text())
+        assert len(data["entries"]) == 1
+        assert data["entries"][0]["instruction"] == "shows version output"
+
+        # Simulate test_second: Create second cache instance and record entries
+        cache2 = Cache.from_env(cassette_path)
+        cache2.put(
+            instruction="shows help output",
+            screen="$ gt --help\nUsage: gt [OPTIONS]",
+            response="assert 'Usage' in screen",
+            method="expect",
+            assertion_code="assert 'Usage' in screen",
+        )
+
+        # Verify BOTH entries are preserved
+        # (this was the bug - only second entry existed before fix)
+        data = json.loads(cassette_path.read_text())
+        assert len(data["entries"]) == 2, (
+            f"Expected 2 entries but got {len(data['entries'])}. "
+            "First test's entries were lost!"
+        )
+
+        instructions = [e["instruction"] for e in data["entries"]]
+        assert "shows version output" in instructions, "test_first's entry is missing"
+        assert "shows help output" in instructions, "test_second's entry is missing"
+
+    def test_rerecording_replaces_existing_entry(self, tmp_path):
+        """
+        When re-recording the same instruction, the entry should be replaced,
+        not duplicated.
+        """
+        cassette_path = tmp_path / "test.json"
+        os.environ["RECORD_MODE"] = "all"
+
+        # First recording
+        cache1 = Cache.from_env(cassette_path)
+        cache1.put(
+            instruction="shows version",
+            screen="v1.0.0",
+            response="old response",
+            method="expect",
+            assertion_code="assert 'v1.0.0' in screen",
+        )
+
+        # Verify initial state
+        data = json.loads(cassette_path.read_text())
+        assert len(data["entries"]) == 1
+        assert data["entries"][0]["assertion_code"] == "assert 'v1.0.0' in screen"
+
+        # Re-record the same instruction with updated assertion
+        cache2 = Cache.from_env(cassette_path)
+        cache2.put(
+            instruction="shows version",
+            screen="v2.0.0",
+            response="new response",
+            method="expect",
+            assertion_code="assert 'v2.0.0' in screen",
+        )
+
+        # Should still have only 1 entry, but updated
+        data = json.loads(cassette_path.read_text())
+        assert len(data["entries"]) == 1, "Entry should be replaced, not duplicated"
+        assert data["entries"][0]["assertion_code"] == "assert 'v2.0.0' in screen"
+
+    def test_different_methods_are_separate_entries(self, tmp_path):
+        """Same instruction but different methods should be separate entries."""
+        cassette_path = tmp_path / "test.json"
+        os.environ["RECORD_MODE"] = "all"
+
+        cache = Cache.from_env(cassette_path)
+
+        # Add expect entry
+        cache.put(
+            instruction="verify output",
+            screen="output",
+            response="expect response",
+            method="expect",
+            assertion_code="assert True",
+        )
+
+        # Add complete entry with same instruction
+        cache.put(
+            instruction="verify output",
+            screen="output",
+            response="complete response",
+            method="complete",
+        )
+
+        # Should have both entries
+        data = json.loads(cassette_path.read_text())
+        assert len(data["entries"]) == 2
+
+        methods = [e["method"] for e in data["entries"]]
+        assert "expect" in methods
+        assert "complete" in methods
+
+
+class TestCacheRecordModes:
+    """Test cache behavior in different record modes."""
+
+    def test_once_mode_loads_existing_cassette(self, tmp_path):
+        """ONCE mode should load existing cassette and not record."""
+        cassette_path = tmp_path / "test.json"
+
+        # Create cassette with initial entry
+        cassette_path.write_text(json.dumps({
+            "entries": [{
+                "instruction": "existing",
+                "screen": "screen",
+                "response": "response",
+                "method": "expect",
+            }]
+        }))
+
+        os.environ["RECORD_MODE"] = "once"
+        cache = Cache.from_env(cassette_path)
+
+        # Should have loaded the entry
+        assert len(cache._entries) == 1
+        assert cache._entries[0].instruction == "existing"
+        # Should not record (cassette exists)
+        assert not cache._should_record
+
+    def test_none_mode_loads_existing_cassette(self, tmp_path):
+        """NONE mode should load existing cassette."""
+        cassette_path = tmp_path / "test.json"
+
+        cassette_path.write_text(json.dumps({
+            "entries": [{
+                "instruction": "existing",
+                "screen": "screen",
+                "response": "response",
+                "method": "expect",
+            }]
+        }))
+
+        os.environ["RECORD_MODE"] = "none"
+        cache = Cache.from_env(cassette_path)
+
+        assert len(cache._entries) == 1
+        assert not cache._should_record
+
+    def test_all_mode_loads_existing_and_records(self, tmp_path):
+        """ALL mode should load existing cassette AND allow recording."""
+        cassette_path = tmp_path / "test.json"
+
+        cassette_path.write_text(json.dumps({
+            "entries": [{
+                "instruction": "existing",
+                "screen": "screen",
+                "response": "response",
+                "method": "expect",
+            }]
+        }))
+
+        os.environ["RECORD_MODE"] = "all"
+        cache = Cache.from_env(cassette_path)
+
+        # Should have loaded existing entries
+        assert len(cache._entries) == 1
+        assert cache._entries[0].instruction == "existing"
+        # Should allow recording
+        assert cache._should_record
+
+
+@pytest.fixture(autouse=True)
+def cleanup_env():
+    """Clean up environment variables after each test."""
+    yield
+    if "RECORD_MODE" in os.environ:
+        del os.environ["RECORD_MODE"]