diff --git a/src/noot/cache.py b/src/noot/cache.py index b50cfb7..a020eaf 100644 --- a/src/noot/cache.py +++ b/src/noot/cache.py @@ -114,9 +114,10 @@ def from_env(cls, cassette_path: Path | None = None) -> "Cache": cassette_exists = cassette_path and cassette_path.exists() if mode == RecordMode.ALL: - # Always record, clear any existing entries + # Always record, but preserve entries from other tests cache._should_record = True - cache._entries = [] + if cassette_exists: + cache._load() # Load existing entries to preserve other tests' entries elif mode == RecordMode.NONE: # Replay only cache._should_record = False @@ -202,6 +203,9 @@ def put( """ Store a response in the cache. + If an entry with the same instruction and method already exists, + it will be replaced (to support re-recording). + Args: instruction: The instruction sent to the LLM screen: The screen content at time of request @@ -212,15 +216,22 @@ def put( if not self._should_record: return - self._entries.append( - CacheEntry( - instruction=instruction, - screen=screen, - response=response, - method=method, - assertion_code=assertion_code, - ) + new_entry = CacheEntry( + instruction=instruction, + screen=screen, + response=response, + method=method, + assertion_code=assertion_code, ) + + # Replace existing entry with same instruction/method, or append if new + for i, entry in enumerate(self._entries): + if entry.instruction == instruction and entry.method == method: + self._entries[i] = new_entry + self.save() + return + + self._entries.append(new_entry) self.save() @property diff --git a/tests/test_cache.py b/tests/test_cache.py new file mode 100644 index 0000000..3b8b625 --- /dev/null +++ b/tests/test_cache.py @@ -0,0 +1,210 @@ +"""Unit tests for the Cache class.""" + +import json +import os + +import pytest + +from noot.cache import Cache + + +class TestCacheMultipleInstances: + """Test that multiple cache instances sharing the same cassette preserve entries.""" + + def test_multiple_cache_instances_preserve_entries_in_all_mode(self, tmp_path): + """ + Bug fix test: Multiple cache instances sharing the same default cassette + should preserve each other's entries when using RECORD_MODE=all. + + This reproduces the bug reported where: + 1. test_first records its entries to default.json + 2. test_second creates a new Cache, overwrites default.json + 3. test_first's entries are lost + + The fix ensures entries are loaded before recording in ALL mode. + """ + cassette_path = tmp_path / "default.json" + + # Simulate test_first: Create first cache instance and record entries + os.environ["RECORD_MODE"] = "all" + cache1 = Cache.from_env(cassette_path) + cache1.put( + instruction="shows version output", + screen="$ gt version\nv1.0.0", + response="assert 'v1.0.0' in screen", + method="expect", + assertion_code="assert 'v1.0.0' in screen", + ) + + # Verify first entry was saved + data = json.loads(cassette_path.read_text()) + assert len(data["entries"]) == 1 + assert data["entries"][0]["instruction"] == "shows version output" + + # Simulate test_second: Create second cache instance and record entries + cache2 = Cache.from_env(cassette_path) + cache2.put( + instruction="shows help output", + screen="$ gt --help\nUsage: gt [OPTIONS]", + response="assert 'Usage' in screen", + method="expect", + assertion_code="assert 'Usage' in screen", + ) + + # Verify BOTH entries are preserved + # (this was the bug - only second entry existed before fix) + data = json.loads(cassette_path.read_text()) + assert len(data["entries"]) == 2, ( + f"Expected 2 entries but got {len(data['entries'])}. " + "First test's entries were lost!" + ) + + instructions = [e["instruction"] for e in data["entries"]] + assert "shows version output" in instructions, "test_first's entry is missing" + assert "shows help output" in instructions, "test_second's entry is missing" + + def test_rerecording_replaces_existing_entry(self, tmp_path): + """ + When re-recording the same instruction, the entry should be replaced, + not duplicated. + """ + cassette_path = tmp_path / "test.json" + os.environ["RECORD_MODE"] = "all" + + # First recording + cache1 = Cache.from_env(cassette_path) + cache1.put( + instruction="shows version", + screen="v1.0.0", + response="old response", + method="expect", + assertion_code="assert 'v1.0.0' in screen", + ) + + # Verify initial state + data = json.loads(cassette_path.read_text()) + assert len(data["entries"]) == 1 + assert data["entries"][0]["assertion_code"] == "assert 'v1.0.0' in screen" + + # Re-record the same instruction with updated assertion + cache2 = Cache.from_env(cassette_path) + cache2.put( + instruction="shows version", + screen="v2.0.0", + response="new response", + method="expect", + assertion_code="assert 'v2.0.0' in screen", + ) + + # Should still have only 1 entry, but updated + data = json.loads(cassette_path.read_text()) + assert len(data["entries"]) == 1, "Entry should be replaced, not duplicated" + assert data["entries"][0]["assertion_code"] == "assert 'v2.0.0' in screen" + + def test_different_methods_are_separate_entries(self, tmp_path): + """Same instruction but different methods should be separate entries.""" + cassette_path = tmp_path / "test.json" + os.environ["RECORD_MODE"] = "all" + + cache = Cache.from_env(cassette_path) + + # Add expect entry + cache.put( + instruction="verify output", + screen="output", + response="expect response", + method="expect", + assertion_code="assert True", + ) + + # Add complete entry with same instruction + cache.put( + instruction="verify output", + screen="output", + response="complete response", + method="complete", + ) + + # Should have both entries + data = json.loads(cassette_path.read_text()) + assert len(data["entries"]) == 2 + + methods = [e["method"] for e in data["entries"]] + assert "expect" in methods + assert "complete" in methods + + +class TestCacheRecordModes: + """Test cache behavior in different record modes.""" + + def test_once_mode_loads_existing_cassette(self, tmp_path): + """ONCE mode should load existing cassette and not record.""" + cassette_path = tmp_path / "test.json" + + # Create cassette with initial entry + cassette_path.write_text(json.dumps({ + "entries": [{ + "instruction": "existing", + "screen": "screen", + "response": "response", + "method": "expect", + }] + })) + + os.environ["RECORD_MODE"] = "once" + cache = Cache.from_env(cassette_path) + + # Should have loaded the entry + assert len(cache._entries) == 1 + assert cache._entries[0].instruction == "existing" + # Should not record (cassette exists) + assert not cache._should_record + + def test_none_mode_loads_existing_cassette(self, tmp_path): + """NONE mode should load existing cassette.""" + cassette_path = tmp_path / "test.json" + + cassette_path.write_text(json.dumps({ + "entries": [{ + "instruction": "existing", + "screen": "screen", + "response": "response", + "method": "expect", + }] + })) + + os.environ["RECORD_MODE"] = "none" + cache = Cache.from_env(cassette_path) + + assert len(cache._entries) == 1 + assert not cache._should_record + + def test_all_mode_loads_existing_and_records(self, tmp_path): + """ALL mode should load existing cassette AND allow recording.""" + cassette_path = tmp_path / "test.json" + + cassette_path.write_text(json.dumps({ + "entries": [{ + "instruction": "existing", + "screen": "screen", + "response": "response", + "method": "expect", + }] + })) + + os.environ["RECORD_MODE"] = "all" + cache = Cache.from_env(cassette_path) + + # Should have loaded existing entries + assert len(cache._entries) == 1 + assert cache._entries[0].instruction == "existing" + # Should allow recording + assert cache._should_record + + +@pytest.fixture(autouse=True) +def cleanup_env(): + """Clean up environment variables after each test.""" + yield + if "RECORD_MODE" in os.environ: + del os.environ["RECORD_MODE"]