diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index 0e73ca190..58fbbe340 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -8,6 +8,7 @@ import ( "os" "os/exec" "path/filepath" + "reflect" "strconv" "strings" "testing" @@ -39,6 +40,8 @@ func TestCheckpointType_Values(t *testing.T) { } func TestCopyMetadataDir_SkipsSymlinks(t *testing.T) { + t.Parallel() + // Create a temp directory for the test tempDir := t.TempDir() @@ -97,6 +100,110 @@ func TestCopyMetadataDir_SkipsSymlinks(t *testing.T) { } } +func TestCopyMetadataDir_AllowsLeadingDotsInFileName(t *testing.T) { + t.Parallel() + + tempDir := t.TempDir() + + repo, err := git.PlainInit(tempDir, false) + if err != nil { + t.Fatalf("failed to init git repo: %v", err) + } + + metadataDir := filepath.Join(tempDir, "metadata") + if err := os.MkdirAll(metadataDir, 0o755); err != nil { + t.Fatalf("failed to create metadata dir: %v", err) + } + + dotFile := filepath.Join(metadataDir, "..notes.txt") + if err := os.WriteFile(dotFile, []byte("ok"), 0o644); err != nil { + t.Fatalf("failed to write file with leading dots: %v", err) + } + + store := NewGitStore(repo) + entries := make(map[string]object.TreeEntry) + if err := store.copyMetadataDir(metadataDir, "checkpoint/", entries); err != nil { + t.Fatalf("copyMetadataDir() error = %v", err) + } + + if _, ok := entries["checkpoint/..notes.txt"]; !ok { + t.Error("expected file with leading dots to be included") + } +} + +func TestAddDirectoryToEntriesWithAbsPath_AllowsLeadingDotsInFileName(t *testing.T) { + t.Parallel() + + tempDir := t.TempDir() + + repo, err := git.PlainInit(tempDir, false) + if err != nil { + t.Fatalf("failed to init git repo: %v", err) + } + + entries := make(map[string]object.TreeEntry) + + metadataDir := filepath.Join(tempDir, ".entire", "metadata", "test-session") + if err := os.MkdirAll(metadataDir, 0o755); err != nil { + t.Fatalf("failed to create metadata dir: %v", err) + } + + // Filename that begins with ".." but is still inside the directory. + dotFile := filepath.Join(metadataDir, "..notes.txt") + if err := os.WriteFile(dotFile, []byte("ok"), 0o644); err != nil { + t.Fatalf("failed to write file with leading dots: %v", err) + } + + relPath := filepath.ToSlash(filepath.Join(paths.EntireMetadataDir, "test-session")) + err = addDirectoryToEntriesWithAbsPath(repo, metadataDir, relPath, entries) + if err != nil { + t.Fatalf("addDirectoryToEntriesWithAbsPath() error = %v", err) + } + + expectedPath := filepath.ToSlash(filepath.Join(relPath, "..notes.txt")) + if _, ok := entries[expectedPath]; !ok { + t.Errorf("expected file with leading dots to be included: %s", expectedPath) + } +} + +func TestAddDirectoryToEntriesWithAbsPath_NormalizesTreePathSeparators(t *testing.T) { + t.Parallel() + + tempDir := t.TempDir() + + repo, err := git.PlainInit(tempDir, false) + if err != nil { + t.Fatalf("failed to init git repo: %v", err) + } + + entries := make(map[string]object.TreeEntry) + + metadataDir := filepath.Join(tempDir, ".entire", "metadata", "test-session") + if err := os.MkdirAll(metadataDir, 0o755); err != nil { + t.Fatalf("failed to create metadata dir: %v", err) + } + filePath := filepath.Join(metadataDir, "notes.txt") + if err := os.WriteFile(filePath, []byte("ok"), 0o644); err != nil { + t.Fatalf("failed to write notes.txt: %v", err) + } + + // Simulate a windows-style relative path passed into tree construction. + relPath := `checkpoint\metadata\test-session` + if err := addDirectoryToEntriesWithAbsPath(repo, metadataDir, relPath, entries); err != nil { + t.Fatalf("addDirectoryToEntriesWithAbsPath() error = %v", err) + } + + expectedPath := "checkpoint/metadata/test-session/notes.txt" + if _, ok := entries[expectedPath]; !ok { + t.Fatalf("expected normalized path %q in entries", expectedPath) + } + for entryPath := range entries { + if strings.Contains(entryPath, `\`) { + t.Fatalf("entry path should use '/' separators, got %q", entryPath) + } + } +} + // TestWriteCommitted_AgentField verifies that the Agent field is written // to both metadata.json and the commit message trailer. func TestWriteCommitted_AgentField(t *testing.T) { @@ -285,6 +392,32 @@ func readLatestSessionMetadata(t *testing.T, repo *git.Repository, checkpointID return metadata } +func readFileFromCheckpointCommit(t *testing.T, repo *git.Repository, commitHash plumbing.Hash, filePath string) string { + t.Helper() + + commit, err := repo.CommitObject(commitHash) + if err != nil { + t.Fatalf("failed to get commit object: %v", err) + } + + tree, err := commit.Tree() + if err != nil { + t.Fatalf("failed to get commit tree: %v", err) + } + + file, err := tree.File(filePath) + if err != nil { + t.Fatalf("failed to find file %s: %v", filePath, err) + } + + content, err := file.Contents() + if err != nil { + t.Fatalf("failed to read file %s: %v", filePath, err) + } + + return content +} + // Note: Tests for Agents array and SessionCount fields have been removed // as those fields were removed from CommittedMetadata in the simplification. @@ -2857,6 +2990,727 @@ func TestCopyMetadataDir_RedactsSecrets(t *testing.T) { } } +func TestWriteCommitted_MetadataJSON_RedactsSecrets(t *testing.T) { + t.Parallel() + + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("aabbccddeef4") + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: "redact-metadata-json-session", + Strategy: "manual-commit", + Transcript: []byte(`{"msg":"safe"}`), + Summary: &Summary{ + Intent: highEntropySecret, + }, + CheckpointsCount: 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + metadata := readLatestSessionMetadata(t, repo, checkpointID) + if metadata.Summary == nil { + t.Fatal("expected summary to be present in session metadata") + } + + if strings.Contains(metadata.Summary.Intent, highEntropySecret) { + t.Error("session metadata summary intent should not contain the secret after redaction") + } + if !strings.Contains(metadata.Summary.Intent, "REDACTED") { + t.Error("session metadata summary intent should contain REDACTED placeholder") + } +} + +func TestWriteCommitted_UpdateSummary_RedactsSecrets(t *testing.T) { + t.Parallel() + + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("aabbccddeef5") + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: "redact-update-summary-session", + Strategy: "manual-commit", + Transcript: []byte(`{"msg":"safe"}`), + CheckpointsCount: 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + if err := store.UpdateSummary(context.Background(), checkpointID, &Summary{Intent: highEntropySecret}); err != nil { + t.Fatalf("UpdateSummary() error = %v", err) + } + + metadata := readLatestSessionMetadata(t, repo, checkpointID) + if metadata.Summary == nil { + t.Fatal("expected summary to be present in session metadata") + } + if strings.Contains(metadata.Summary.Intent, highEntropySecret) { + t.Error("session metadata summary intent should not contain the secret after redaction") + } + if !strings.Contains(metadata.Summary.Intent, "REDACTED") { + t.Error("session metadata summary intent should contain REDACTED placeholder") + } +} + +func TestWriteCommitted_MetadataJSON_PreservesMetadataIDFields(t *testing.T) { + t.Parallel() + + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("aabbccddeef6") + + sessionID := "session_" + highEntropySecret + toolUseID := "toolu_" + highEntropySecret + transcriptIdentifier := "message_" + highEntropySecret + transcriptPath := ".claude/projects/" + highEntropySecret + "/transcript.jsonl" + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: sessionID, + ToolUseID: toolUseID, + Strategy: "manual-commit", + Transcript: []byte(`{"msg":"safe"}`), + CheckpointsCount: 1, + TranscriptIdentifierAtStart: transcriptIdentifier, + SessionTranscriptPath: transcriptPath, + Summary: &Summary{ + Intent: highEntropySecret, + }, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + metadata := readLatestSessionMetadata(t, repo, checkpointID) + if metadata.SessionID != sessionID { + t.Errorf("session metadata session_id = %q, want %q", metadata.SessionID, sessionID) + } + if metadata.ToolUseID != toolUseID { + t.Errorf("session metadata tool_use_id = %q, want %q", metadata.ToolUseID, toolUseID) + } + if metadata.TranscriptIdentifierAtStart != transcriptIdentifier { + t.Errorf("session metadata transcript_identifier_at_start = %q, want %q", metadata.TranscriptIdentifierAtStart, transcriptIdentifier) + } + if metadata.TranscriptPath != transcriptPath { + t.Errorf("session metadata transcript_path = %q, want %q", metadata.TranscriptPath, transcriptPath) + } + if metadata.Summary == nil { + t.Fatal("expected summary to be present in session metadata") + } + if strings.Contains(metadata.Summary.Intent, highEntropySecret) { + t.Error("session metadata summary intent should not contain the secret after redaction") + } + if !strings.Contains(metadata.Summary.Intent, "REDACTED") { + t.Error("session metadata summary intent should contain REDACTED placeholder") + } +} + +func TestWriteCommitted_UpdateSummary_PreservesMetadataIDFields(t *testing.T) { + t.Parallel() + + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("aabbccddeef7") + + sessionID := "session_" + highEntropySecret + toolUseID := "toolu_" + highEntropySecret + transcriptIdentifier := "message_" + highEntropySecret + transcriptPath := ".claude/projects/" + highEntropySecret + "/transcript.jsonl" + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: sessionID, + ToolUseID: toolUseID, + Strategy: "manual-commit", + Transcript: []byte(`{"msg":"safe"}`), + CheckpointsCount: 1, + TranscriptIdentifierAtStart: transcriptIdentifier, + SessionTranscriptPath: transcriptPath, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + if err := store.UpdateSummary(context.Background(), checkpointID, &Summary{Intent: highEntropySecret}); err != nil { + t.Fatalf("UpdateSummary() error = %v", err) + } + + metadata := readLatestSessionMetadata(t, repo, checkpointID) + if metadata.SessionID != sessionID { + t.Errorf("session metadata session_id = %q, want %q", metadata.SessionID, sessionID) + } + if metadata.ToolUseID != toolUseID { + t.Errorf("session metadata tool_use_id = %q, want %q", metadata.ToolUseID, toolUseID) + } + if metadata.TranscriptIdentifierAtStart != transcriptIdentifier { + t.Errorf("session metadata transcript_identifier_at_start = %q, want %q", metadata.TranscriptIdentifierAtStart, transcriptIdentifier) + } + if metadata.TranscriptPath != transcriptPath { + t.Errorf("session metadata transcript_path = %q, want %q", metadata.TranscriptPath, transcriptPath) + } + if metadata.Summary == nil { + t.Fatal("expected summary to be present in session metadata") + } + if strings.Contains(metadata.Summary.Intent, highEntropySecret) { + t.Error("session metadata summary intent should not contain the secret after redaction") + } + if !strings.Contains(metadata.Summary.Intent, "REDACTED") { + t.Error("session metadata summary intent should contain REDACTED placeholder") + } +} + +func TestWriteCommitted_TaskSubagentTranscript_FallsBackToPlainTextRedaction(t *testing.T) { + t.Parallel() + + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("aabbccddeef8") + + tempDir := t.TempDir() + subagentTranscriptPath := filepath.Join(tempDir, "subagent-transcript.jsonl") + invalidJSONL := `{"role":"assistant","content":"` + highEntropySecret + `"` + if err := os.WriteFile(subagentTranscriptPath, []byte(invalidJSONL), 0o644); err != nil { + t.Fatalf("failed to write subagent transcript: %v", err) + } + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: "redact-task-subagent-fallback-session", + Strategy: "manual-commit", + Transcript: []byte(`{"msg":"safe"}`), + CheckpointsCount: 1, + IsTask: true, + ToolUseID: "tool-1", + AgentID: "agent-1", + SubagentTranscriptPath: subagentTranscriptPath, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + ref, err := repo.Reference(plumbing.NewBranchReferenceName(paths.MetadataBranchName), true) + if err != nil { + t.Fatalf("failed to get metadata branch reference: %v", err) + } + + agentPath := checkpointID.Path() + "/tasks/tool-1/agent-agent-1.jsonl" + agentContent := readFileFromCheckpointCommit(t, repo, ref.Hash(), agentPath) + if strings.Contains(agentContent, highEntropySecret) { + t.Error("subagent transcript should not contain the secret after fallback redaction") + } + if !strings.Contains(agentContent, "REDACTED") { + t.Error("subagent transcript should contain REDACTED placeholder after fallback redaction") + } +} + +func TestWriteTemporaryTask_MetadataFiles_RedactSecrets(t *testing.T) { + t.Parallel() + + tempDir := t.TempDir() + + repo, err := git.PlainInit(tempDir, false) + if err != nil { + t.Fatalf("failed to init git repo: %v", err) + } + + worktree, err := repo.Worktree() + if err != nil { + t.Fatalf("failed to get worktree: %v", err) + } + + readmeFile := filepath.Join(tempDir, "README.md") + if err := os.WriteFile(readmeFile, []byte("# Test"), 0644); err != nil { + t.Fatalf("failed to write README: %v", err) + } + if _, err := worktree.Add("README.md"); err != nil { + t.Fatalf("failed to add README: %v", err) + } + initialCommit, err := worktree.Commit("Initial commit", &git.CommitOptions{ + Author: &object.Signature{Name: "Test", Email: "test@example.com"}, + }) + if err != nil { + t.Fatalf("failed to commit initial tree: %v", err) + } + + store := NewGitStore(repo) + + transcriptPath := filepath.Join(tempDir, "task-full.jsonl") + if err := os.WriteFile(transcriptPath, []byte(`{"role":"assistant","content":"`+highEntropySecret+`"}`+"\n"), 0644); err != nil { + t.Fatalf("failed to write task transcript: %v", err) + } + + agentTranscriptPath := filepath.Join(tempDir, "agent-transcript.jsonl") + if err := os.WriteFile(agentTranscriptPath, []byte(`{"role":"assistant","content":"agent `+highEntropySecret+`"}`+"\n"), 0644); err != nil { + t.Fatalf("failed to write agent transcript: %v", err) + } + + commitHash, err := store.WriteTemporaryTask(context.Background(), WriteTemporaryTaskOptions{ + SessionID: "redact-temp-task-session", + BaseCommit: initialCommit.String(), + ToolUseID: "tool-1", + AgentID: "agent-1", + TranscriptPath: transcriptPath, + SubagentTranscriptPath: agentTranscriptPath, + CommitMessage: "task checkpoint", + AuthorName: "Test", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteTemporaryTask() error = %v", err) + } + + metadataDir := strings.TrimSuffix(paths.EntireMetadataDir, "/") + "/redact-temp-task-session" + transcriptFile := metadataDir + "/" + paths.TranscriptFileName + subagentFile := strings.TrimSuffix(paths.EntireMetadataDir, "/") + "/redact-temp-task-session/tasks/tool-1/agent-agent-1.jsonl" + + transcriptContent := readFileFromCheckpointCommit(t, repo, commitHash, transcriptFile) + if strings.Contains(transcriptContent, highEntropySecret) { + t.Error("task transcript should not contain the secret after redaction") + } + if !strings.Contains(transcriptContent, "REDACTED") { + t.Error("task transcript should contain REDACTED placeholder") + } + + subagentContent := readFileFromCheckpointCommit(t, repo, commitHash, subagentFile) + if strings.Contains(subagentContent, highEntropySecret) { + t.Error("subagent transcript should not contain the secret after redaction") + } + if !strings.Contains(subagentContent, "REDACTED") { + t.Error("subagent transcript should contain REDACTED placeholder") + } +} + +func TestWriteTemporary_MetadataDir_RedactsSecrets(t *testing.T) { + tempDir := t.TempDir() + + repo, err := git.PlainInit(tempDir, false) + if err != nil { + t.Fatalf("failed to init git repo: %v", err) + } + + worktree, err := repo.Worktree() + if err != nil { + t.Fatalf("failed to get worktree: %v", err) + } + + readmeFile := filepath.Join(tempDir, "README.md") + if err := os.WriteFile(readmeFile, []byte("# Test"), 0644); err != nil { + t.Fatalf("failed to write README: %v", err) + } + if _, err := worktree.Add("README.md"); err != nil { + t.Fatalf("failed to add README: %v", err) + } + if _, err := worktree.Commit("Initial commit", &git.CommitOptions{ + Author: &object.Signature{Name: "Test", Email: "test@example.com"}, + }); err != nil { + t.Fatalf("failed to create initial commit: %v", err) + } + + origDir, err := os.Getwd() + if err != nil { + t.Fatalf("failed to get current working directory: %v", err) + } + t.Cleanup(func() { + t.Chdir(origDir) + }) + + // Ensure paths.RepoRoot() resolves to this repo in this test. + t.Chdir(tempDir) + + store := NewGitStore(repo) + metadataDir := filepath.Join(tempDir, ".entire", "metadata", "test-session") + if err := os.MkdirAll(metadataDir, 0o755); err != nil { + t.Fatalf("failed to create metadata dir: %v", err) + } + + fullPath := filepath.Join(metadataDir, "full.jsonl") + if err := os.WriteFile(fullPath, []byte(`{"content":"secret=`+highEntropySecret+`"}`+"\n"), 0644); err != nil { + t.Fatalf("failed to write full.jsonl: %v", err) + } + + notePath := filepath.Join(metadataDir, "notes.txt") + if err := os.WriteFile(notePath, []byte("secret="+highEntropySecret), 0644); err != nil { + t.Fatalf("failed to write notes.txt: %v", err) + } + + head, err := repo.Head() + if err != nil { + t.Fatalf("failed to get HEAD: %v", err) + } + + result, err := store.WriteTemporary(context.Background(), WriteTemporaryOptions{ + SessionID: "redact-temp-commit-session", + BaseCommit: head.Hash().String(), + ModifiedFiles: []string{"README.md"}, + MetadataDir: ".entire/metadata/test-session", + MetadataDirAbs: metadataDir, + CommitMessage: "checkpoint", + AuthorName: "Test", + AuthorEmail: "test@example.com", + IsFirstCheckpoint: false, + }) + if err != nil { + t.Fatalf("WriteTemporary() error = %v", err) + } + + full := readFileFromCheckpointCommit(t, repo, result.CommitHash, ".entire/metadata/test-session/full.jsonl") + if strings.Contains(full, highEntropySecret) { + t.Error("full.jsonl should not contain the secret after redaction") + } + if !strings.Contains(full, "REDACTED") { + t.Error("full.jsonl should contain REDACTED placeholder") + } + + notes := readFileFromCheckpointCommit(t, repo, result.CommitHash, ".entire/metadata/test-session/notes.txt") + if strings.Contains(notes, highEntropySecret) { + t.Error("notes.txt should not contain the secret after redaction") + } + if !strings.Contains(notes, "REDACTED") { + t.Error("notes.txt should contain REDACTED placeholder") + } +} + +func TestWriteTemporary_MetadataDir_SkipsSymlinks(t *testing.T) { + tempDir := t.TempDir() + + repo, err := git.PlainInit(tempDir, false) + if err != nil { + t.Fatalf("failed to init git repo: %v", err) + } + + worktree, err := repo.Worktree() + if err != nil { + t.Fatalf("failed to get worktree: %v", err) + } + + readmeFile := filepath.Join(tempDir, "README.md") + if err := os.WriteFile(readmeFile, []byte("# Test"), 0644); err != nil { + t.Fatalf("failed to write README: %v", err) + } + if _, err := worktree.Add("README.md"); err != nil { + t.Fatalf("failed to add README: %v", err) + } + if _, err := worktree.Commit("Initial commit", &git.CommitOptions{ + Author: &object.Signature{Name: "Test", Email: "test@example.com"}, + }); err != nil { + t.Fatalf("failed to create initial commit: %v", err) + } + + origDir, err := os.Getwd() + if err != nil { + t.Fatalf("failed to get current working directory: %v", err) + } + t.Cleanup(func() { + t.Chdir(origDir) + }) + t.Chdir(tempDir) + + store := NewGitStore(repo) + sessionID := "redact-temp-symlink-session" + metadataDir := filepath.Join(tempDir, ".entire", "metadata", sessionID) + if err := os.MkdirAll(metadataDir, 0o755); err != nil { + t.Fatalf("failed to create metadata dir: %v", err) + } + + regularFile := filepath.Join(metadataDir, "notes.txt") + if err := os.WriteFile(regularFile, []byte("secret="+highEntropySecret), 0644); err != nil { + t.Fatalf("failed to write notes.txt: %v", err) + } + + sensitiveFile := filepath.Join(tempDir, "sensitive.txt") + if err := os.WriteFile(sensitiveFile, []byte("very-secret-data"), 0644); err != nil { + t.Fatalf("failed to write sensitive file: %v", err) + } + symlinkPath := filepath.Join(metadataDir, "sneaky-link") + if err := os.Symlink(sensitiveFile, symlinkPath); err != nil { + t.Fatalf("failed to create symlink: %v", err) + } + + head, err := repo.Head() + if err != nil { + t.Fatalf("failed to get HEAD: %v", err) + } + + result, err := store.WriteTemporary(context.Background(), WriteTemporaryOptions{ + SessionID: sessionID, + BaseCommit: head.Hash().String(), + ModifiedFiles: []string{"README.md"}, + MetadataDir: filepath.ToSlash(filepath.Join(paths.EntireMetadataDir, sessionID)), + MetadataDirAbs: metadataDir, + CommitMessage: "checkpoint", + AuthorName: "Test", + AuthorEmail: "test@example.com", + IsFirstCheckpoint: false, + }) + if err != nil { + t.Fatalf("WriteTemporary() error = %v", err) + } + + regular := readFileFromCheckpointCommit(t, repo, result.CommitHash, filepath.ToSlash(filepath.Join(paths.EntireMetadataDir, sessionID, "notes.txt"))) + if strings.Contains(regular, highEntropySecret) { + t.Error("notes.txt should not contain the secret after redaction") + } + if !strings.Contains(regular, "REDACTED") { + t.Error("notes.txt should contain REDACTED placeholder") + } + + commit, err := repo.CommitObject(result.CommitHash) + if err != nil { + t.Fatalf("failed to get checkpoint commit object: %v", err) + } + tree, err := commit.Tree() + if err != nil { + t.Fatalf("failed to get checkpoint tree: %v", err) + } + if _, err := tree.File(filepath.ToSlash(filepath.Join(paths.EntireMetadataDir, sessionID, "sneaky-link"))); err == nil { + t.Error("sneaky-link should not be included in temporary checkpoint metadata") + } +} + +// TestRedactSummary_CoversAllFields guards against Summary fields being added +// without updating redactSummary(). It uses struct-shape tripwires and +// reflection to verify that every string field is redacted, no field is +// silently zeroed, and non-string data is preserved unchanged. +func TestRedactSummary_CoversAllFields(t *testing.T) { + t.Parallel() + + // Tripwire: fail when struct shape changes. + assertFieldCount(t, Summary{}, 5, "Summary") + assertFieldCount(t, LearningsSummary{}, 3, "LearningsSummary") + assertFieldCount(t, CodeLearning{}, 4, "CodeLearning") + + // Auto-populate every field: strings get a secret marker, + // ints get a non-zero sentinel, slices get one element. + input := &Summary{} + fillStructFields(reflect.ValueOf(input).Elem(), highEntropySecret) + + // Sanity: every scalar in the auto-filled input must be non-zero. + // Catches fillStructFields silently skipping a new field kind. + assertAllScalarsNonZero(t, reflect.ValueOf(input), "input") + + result := redactSummary(input) + if result == nil { + t.Fatal("redactSummary returned nil for non-nil input") + } + + inputStrings := collectStringFields(reflect.ValueOf(input), "") + resultStrings := collectStringFields(reflect.ValueOf(result), "") + + // Key sets must match exactly. + for path := range inputStrings { + if _, ok := resultStrings[path]; !ok { + t.Errorf("field %s present in input but missing in result — redactSummary does not handle it", path) + } + } + for path := range resultStrings { + if _, ok := inputStrings[path]; !ok { + t.Errorf("unexpected field %s in result but not in input", path) + } + } + + // Every input string must be non-empty (auto-fill sanity check). + for path, val := range inputStrings { + if val == "" { + t.Fatalf("auto-fill bug: field %s is empty in input", path) + } + } + + // No result string may be empty (silently zeroed) or contain the raw secret. + for path, val := range resultStrings { + if val == "" { + t.Errorf("field %s was silently zeroed — redactSummary does not copy this field", path) + } + if strings.Contains(val, highEntropySecret) { + t.Errorf("field %s still contains raw secret after redaction", path) + } + } + + // Non-string invariant: only string content may differ. + // Deep-copy both, normalize all strings to a fixed token, then DeepEqual. + inputNorm := jsonRoundTripSummary(t, input) + resultNorm := jsonRoundTripSummary(t, result) + setAllStrings(reflect.ValueOf(inputNorm).Elem(), "X") + setAllStrings(reflect.ValueOf(resultNorm).Elem(), "X") + if !reflect.DeepEqual(inputNorm, resultNorm) { + t.Errorf("redactSummary altered non-string data:\n input (normalized): %+v\n result (normalized): %+v", inputNorm, resultNorm) + } +} + +func assertFieldCount(t *testing.T, v any, expected int, name string) { + t.Helper() + actual := reflect.TypeOf(v).NumField() + if actual != expected { + t.Fatalf("%s has %d fields (expected %d) — update redactSummary() and this test", + name, actual, expected) + } +} + +// fillStructFields recursively populates every field in a struct with +// non-zero values: strings → secret, bools → true, ints/uints → 7, +// floats → 7.5, pointers → allocate and recurse, slices → one element. +func fillStructFields(v reflect.Value, secret string) { + switch v.Kind() { + case reflect.Ptr: + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + fillStructFields(v.Elem(), secret) + case reflect.Struct: + for i := range v.NumField() { + fillStructFields(v.Field(i), secret) + } + case reflect.String: + v.SetString(secret) + case reflect.Bool: + v.SetBool(true) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + v.SetInt(7) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + v.SetUint(7) + case reflect.Float32, reflect.Float64: + v.SetFloat(7.5) + case reflect.Slice: + s := reflect.MakeSlice(v.Type(), 1, 1) + fillStructFields(s.Index(0), secret) + v.Set(s) + } +} + +// assertAllScalarsNonZero walks a struct and fatals if any scalar field +// (string, bool, int, uint, float) is at its zero value. This guards +// against fillStructFields silently skipping a new field kind. +func assertAllScalarsNonZero(t *testing.T, v reflect.Value, prefix string) { + t.Helper() + if v.Kind() == reflect.Ptr { + if v.IsNil() { + return + } + v = v.Elem() + } + switch v.Kind() { + case reflect.Struct: + tp := v.Type() + for i := range tp.NumField() { + name := tp.Field(i).Name + p := name + if prefix != "" { + p = prefix + "." + name + } + assertAllScalarsNonZero(t, v.Field(i), p) + } + case reflect.Slice: + for i := range v.Len() { + assertAllScalarsNonZero(t, v.Index(i), fmt.Sprintf("%s[%d]", prefix, i)) + } + case reflect.String: + if v.String() == "" { + t.Fatalf("auto-fill bug: %s is empty string", prefix) + } + case reflect.Bool: + if !v.Bool() { + t.Fatalf("auto-fill bug: %s is false", prefix) + } + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if v.Int() == 0 { + t.Fatalf("auto-fill bug: %s is zero", prefix) + } + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + if v.Uint() == 0 { + t.Fatalf("auto-fill bug: %s is zero", prefix) + } + case reflect.Float32, reflect.Float64: + if v.Float() == 0 { + t.Fatalf("auto-fill bug: %s is zero", prefix) + } + } +} + +// collectStringFields recursively extracts all string values keyed by +// dotted path (e.g. "Learnings.Code[0].Path"). +func collectStringFields(v reflect.Value, prefix string) map[string]string { + out := make(map[string]string) + if v.Kind() == reflect.Ptr { + if v.IsNil() { + return out + } + v = v.Elem() + } + switch v.Kind() { + case reflect.Struct: + t := v.Type() + for i := range t.NumField() { + name := t.Field(i).Name + p := name + if prefix != "" { + p = prefix + "." + name + } + for k, val := range collectStringFields(v.Field(i), p) { + out[k] = val + } + } + case reflect.Slice: + for i := range v.Len() { + p := fmt.Sprintf("%s[%d]", prefix, i) + for k, val := range collectStringFields(v.Index(i), p) { + out[k] = val + } + } + case reflect.String: + out[prefix] = v.String() + } + return out +} + +// setAllStrings recursively sets every string field/element to token. +func setAllStrings(v reflect.Value, token string) { + switch v.Kind() { + case reflect.Ptr: + if !v.IsNil() { + setAllStrings(v.Elem(), token) + } + case reflect.Struct: + for i := range v.NumField() { + setAllStrings(v.Field(i), token) + } + case reflect.Slice: + for i := range v.Len() { + setAllStrings(v.Index(i), token) + } + case reflect.String: + v.SetString(token) + } +} + +// jsonRoundTripSummary returns a deep copy of s via JSON round-trip. +func jsonRoundTripSummary(t *testing.T, s *Summary) *Summary { + t.Helper() + data, err := json.Marshal(s) + if err != nil { + t.Fatalf("failed to marshal Summary for deep copy: %v", err) + } + var cp Summary + if err := json.Unmarshal(data, &cp); err != nil { + t.Fatalf("failed to unmarshal Summary for deep copy: %v", err) + } + return &cp +} + // TestWriteCommitted_CLIVersionField verifies that buildinfo.Version is written // to both the root CheckpointSummary and session-level CommittedMetadata. func TestWriteCommitted_CLIVersionField(t *testing.T) { diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index 67cf6947c..aeae5d0b0 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -204,10 +204,17 @@ func (s *GitStore) writeFinalTaskCheckpoint(opts WriteCommittedOptions, taskPath if opts.SubagentTranscriptPath != "" && opts.AgentID != "" { agentContent, readErr := os.ReadFile(opts.SubagentTranscriptPath) if readErr == nil { - agentContent, readErr = redact.JSONLBytes(agentContent) - } - if readErr == nil { - agentBlobHash, agentBlobErr := CreateBlobFromContent(s.repo, agentContent) + redactedAgentContent, redactErr := redact.JSONLBytes(agentContent) + if redactErr != nil { + logging.Warn(context.Background(), "failed to redact subagent transcript as JSONL, falling back to plain text redaction", + slog.String("error", redactErr.Error()), + slog.String("session_id", opts.SessionID), + slog.String("agent_id", opts.AgentID), + ) + redactedAgentContent = redact.Bytes(agentContent) + } + + agentBlobHash, agentBlobErr := CreateBlobFromContent(s.repo, redactedAgentContent) if agentBlobErr == nil { agentPath := taskPath + "agent-" + opts.AgentID + ".jsonl" entries[agentPath] = object.TreeEntry{ @@ -346,7 +353,7 @@ func (s *GitStore) writeSessionToSubdirectory(opts WriteCommittedOptions, sessio TranscriptLinesAtStart: opts.CheckpointTranscriptStart, // Deprecated: kept for backward compat TokenUsage: opts.TokenUsage, InitialAttribution: opts.InitialAttribution, - Summary: opts.Summary, + Summary: redactSummary(opts.Summary), CLIVersion: buildinfo.Version, TranscriptPath: opts.SessionTranscriptPath, } @@ -969,7 +976,7 @@ func (s *GitStore) UpdateSummary(ctx context.Context, checkpointID id.Checkpoint } // Update the summary - existingMetadata.Summary = summary + existingMetadata.Summary = redactSummary(summary) // Write updated session metadata metadataJSON, err := jsonutil.MarshalIndentWithNewline(existingMetadata, "", " ") @@ -1008,6 +1015,48 @@ func (s *GitStore) UpdateSummary(ctx context.Context, checkpointID id.Checkpoint return nil } +func redactSummary(summary *Summary) *Summary { + if summary == nil { + return nil + } + + redacted := &Summary{ + Intent: redact.String(summary.Intent), + Outcome: redact.String(summary.Outcome), + Learnings: LearningsSummary{ + Repo: redactStringSlice(summary.Learnings.Repo), + Workflow: redactStringSlice(summary.Learnings.Workflow), + }, + Friction: redactStringSlice(summary.Friction), + OpenItems: redactStringSlice(summary.OpenItems), + } + + if summary.Learnings.Code != nil { + redacted.Learnings.Code = make([]CodeLearning, len(summary.Learnings.Code)) + for i, learning := range summary.Learnings.Code { + redacted.Learnings.Code[i] = CodeLearning{ + Path: redact.String(learning.Path), + Line: learning.Line, + EndLine: learning.EndLine, + Finding: redact.String(learning.Finding), + } + } + } + + return redacted +} + +func redactStringSlice(values []string) []string { + if values == nil { + return nil + } + redacted := make([]string, len(values)) + for i, value := range values { + redacted[i] = redact.String(value) + } + return redacted +} + // ensureSessionsBranch ensures the entire/checkpoints/v1 branch exists. func (s *GitStore) ensureSessionsBranch() error { refName := plumbing.NewBranchReferenceName(paths.MetadataBranchName) @@ -1113,9 +1162,10 @@ func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[str if err != nil { return fmt.Errorf("failed to get relative path for %s: %w", path, err) } + relPath = filepath.Clean(relPath) // Prevent path traversal via symlinks pointing outside the metadata dir - if strings.HasPrefix(relPath, "..") { + if relPath == ".." || strings.HasPrefix(relPath, ".."+string(filepath.Separator)) { return fmt.Errorf("path traversal detected: %s", relPath) } diff --git a/cmd/entire/cli/checkpoint/temporary.go b/cmd/entire/cli/checkpoint/temporary.go index 15dee61ca..89dcba6ac 100644 --- a/cmd/entire/cli/checkpoint/temporary.go +++ b/cmd/entire/cli/checkpoint/temporary.go @@ -347,11 +347,20 @@ func (s *GitStore) addTaskMetadataToTree(baseTreeHash plumbing.Hash, opts WriteT // Add session transcript (with chunking support for large transcripts) if opts.TranscriptPath != "" { if transcriptContent, readErr := os.ReadFile(opts.TranscriptPath); readErr == nil { + redactedTranscriptContent, redactErr := redact.JSONLBytes(transcriptContent) + if redactErr != nil { + logging.Warn(context.Background(), "failed to redact task transcript as JSONL, falling back to plain text redaction", + slog.String("error", redactErr.Error()), + slog.String("session_id", opts.SessionID), + ) + redactedTranscriptContent = redact.Bytes(transcriptContent) + } + // Detect agent type from content for proper chunking - agentType := agent.DetectAgentTypeFromContent(transcriptContent) + agentType := agent.DetectAgentTypeFromContent(redactedTranscriptContent) // Chunk if necessary - chunks, chunkErr := agent.ChunkTranscript(transcriptContent, agentType) + chunks, chunkErr := agent.ChunkTranscript(redactedTranscriptContent, agentType) if chunkErr != nil { logging.Warn(context.Background(), "failed to chunk transcript, checkpoint will be saved without transcript", slog.String("error", chunkErr.Error()), @@ -382,7 +391,17 @@ func (s *GitStore) addTaskMetadataToTree(baseTreeHash plumbing.Hash, opts WriteT // Add subagent transcript if available if opts.SubagentTranscriptPath != "" && opts.AgentID != "" { if agentContent, readErr := os.ReadFile(opts.SubagentTranscriptPath); readErr == nil { - if blobHash, blobErr := CreateBlobFromContent(s.repo, agentContent); blobErr == nil { + redactedAgentContent, redactErr := redact.JSONLBytes(agentContent) + if redactErr != nil { + logging.Warn(context.Background(), "failed to redact subagent transcript as JSONL, falling back to plain text redaction", + slog.String("error", redactErr.Error()), + slog.String("session_id", opts.SessionID), + slog.String("agent_id", opts.AgentID), + ) + redactedAgentContent = redact.Bytes(agentContent) + } + + if blobHash, blobErr := CreateBlobFromContent(s.repo, redactedAgentContent); blobErr == nil { agentPath := taskMetadataDir + "/agent-" + opts.AgentID + ".jsonl" entries[agentPath] = object.TreeEntry{ Name: agentPath, @@ -880,19 +899,25 @@ func addDirectoryToEntriesWithAbsPath(repo *git.Repository, dirPathAbs, dirPathR if info.IsDir() { return nil } + if info.Mode()&os.ModeSymlink != 0 { + return nil + } - // Calculate relative path within the directory, then join with dirPathRel for tree entry relWithinDir, err := filepath.Rel(dirPathAbs, path) if err != nil { return fmt.Errorf("failed to get relative path for %s: %w", path, err) } + relWithinDir = filepath.Clean(relWithinDir) + if relWithinDir == ".." || strings.HasPrefix(relWithinDir, ".."+string(filepath.Separator)) { + return fmt.Errorf("path traversal detected: %s", relWithinDir) + } - blobHash, mode, err := createBlobFromFile(repo, path) + treePath := filepath.ToSlash(filepath.Join(dirPathRel, relWithinDir)) + treePath = strings.ReplaceAll(treePath, `\`, "/") + blobHash, mode, err := createRedactedBlobFromFile(repo, path, treePath) if err != nil { return fmt.Errorf("failed to create blob for %s: %w", path, err) } - - treePath := filepath.Join(dirPathRel, relWithinDir) entries[treePath] = object.TreeEntry{ Name: treePath, Mode: mode, diff --git a/cmd/entire/cli/strategy/manual_commit_test.go b/cmd/entire/cli/strategy/manual_commit_test.go index 07aec19c6..ffd417046 100644 --- a/cmd/entire/cli/strategy/manual_commit_test.go +++ b/cmd/entire/cli/strategy/manual_commit_test.go @@ -1479,10 +1479,7 @@ func TestShadowStrategy_CondenseSession_EphemeralBranchTrailer(t *testing.T) { t.Fatalf("failed to create metadata dir: %v", err) } - //nolint:goconst // test data repeated across test functions - transcript := `{"type":"human","message":{"content":"test prompt"}} -{"type":"assistant","message":{"content":"test response"}} -` + transcript := testSessionTranscript if err := os.WriteFile(filepath.Join(metadataDirAbs, paths.TranscriptFileName), []byte(transcript), 0o644); err != nil { t.Fatalf("failed to write transcript: %v", err) } diff --git a/cmd/entire/cli/strategy/phase_postcommit_test.go b/cmd/entire/cli/strategy/phase_postcommit_test.go index 0d1115908..dfc3f8472 100644 --- a/cmd/entire/cli/strategy/phase_postcommit_test.go +++ b/cmd/entire/cli/strategy/phase_postcommit_test.go @@ -20,6 +20,10 @@ import ( "github.com/stretchr/testify/require" ) +const testSessionTranscript = `{"type":"human","message":{"content":"test prompt"}} +{"type":"assistant","message":{"content":"test response"}} +` + // TestPostCommit_ActiveSession_NoCondensation verifies that PostCommit on an // ACTIVE session transitions to ACTIVE_COMMITTED without condensing. // The shadow branch must be preserved because the session is still active. @@ -1256,9 +1260,7 @@ func setupSessionWithFileChange(t *testing.T, s *ManualCommitStrategy, _ *git.Re metadataDirAbs := filepath.Join(dir, metadataDir) require.NoError(t, os.MkdirAll(metadataDirAbs, 0o755)) - transcript := `{"type":"human","message":{"content":"test prompt"}} -{"type":"assistant","message":{"content":"test response"}} -` + transcript := testSessionTranscript require.NoError(t, os.WriteFile( filepath.Join(metadataDirAbs, paths.TranscriptFileName), []byte(transcript), 0o644)) @@ -1288,9 +1290,7 @@ func setupSessionWithCheckpoint(t *testing.T, s *ManualCommitStrategy, _ *git.Re metadataDirAbs := filepath.Join(dir, metadataDir) require.NoError(t, os.MkdirAll(metadataDirAbs, 0o755)) - transcript := `{"type":"human","message":{"content":"test prompt"}} -{"type":"assistant","message":{"content":"test response"}} -` + transcript := testSessionTranscript require.NoError(t, os.WriteFile( filepath.Join(metadataDirAbs, paths.TranscriptFileName), []byte(transcript), 0o644)) diff --git a/redact/redact.go b/redact/redact.go index f9212b384..815e7e272 100644 --- a/redact/redact.go +++ b/redact/redact.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "math" + "reflect" "regexp" "sort" "strings" @@ -128,6 +129,20 @@ func JSONLBytes(b []byte) ([]byte, error) { return []byte(redacted), nil } +// JSONBytes is a convenience wrapper around JSONContent for []byte content. +// JSON content is redacted with JSON-aware field skipping (e.g. keys ending in "id"). +func JSONBytes(b []byte) ([]byte, error) { + s := string(b) + redacted, err := JSONContent(s) + if err != nil { + return nil, err + } + if redacted == s { + return b, nil + } + return []byte(redacted), nil +} + // JSONLContent parses each line as JSON to determine which string values // need redaction, then performs targeted replacements on the raw JSON bytes. // Lines with no secrets are returned unchanged, preserving original formatting. @@ -170,9 +185,114 @@ func JSONLContent(content string) (string, error) { return b.String(), nil } +// JSONContent parses a JSON string and redacts string values while skipping +// identifier fields (keys ending in "id") and non-user payload fields. +func JSONContent(content string) (string, error) { + var parsed any + if err := json.Unmarshal([]byte(content), &parsed); err != nil { + return "", fmt.Errorf("unmarshal JSON content: %w", err) + } + + redacted := redactJSONValue(parsed) + if reflect.DeepEqual(parsed, redacted) { + return content, nil + } + + return marshalJSONPreservingFormatting(redacted, content) +} + +// redactJSONValue walks parsed JSON and redacts string values while preserving +// skipped keys (e.g. ending in "id") and non-user payload fields. +func redactJSONValue(v any) any { + switch val := v.(type) { + case map[string]any: + if shouldSkipJSONLObject(val) { + return val + } + out := make(map[string]any, len(val)) + for key, child := range val { + if shouldSkipJSONField(key) { + out[key] = child + continue + } + out[key] = redactJSONValue(child) + } + return out + case []any: + out := make([]any, len(val)) + for i, child := range val { + out[i] = redactJSONValue(child) + } + return out + case string: + return String(val) + default: + return val + } +} + +// marshalJSONPreservingFormatting marshals v while preserving basic formatting +// characteristics from original (indentation and trailing newline). +func marshalJSONPreservingFormatting(v any, original string) (string, error) { + trimmedOriginal := strings.TrimSuffix(original, "\n") + // Preserve compact style when JSON content itself is a single line, + // while still preserving an optional trailing newline. + if !strings.Contains(trimmedOriginal, "\n") { + out, err := json.Marshal(v) + if err != nil { + return "", fmt.Errorf("marshal JSON content: %w", err) + } + if strings.HasSuffix(original, "\n") { + out = append(out, '\n') + } + return string(out), nil + } + + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + indent := detectJSONIndent(original) + if indent == "" { + indent = " " + } + enc.SetIndent("", indent) + if err := enc.Encode(v); err != nil { + return "", fmt.Errorf("encode formatted JSON content: %w", err) + } + + out := buf.String() + if !strings.HasSuffix(original, "\n") { + out = strings.TrimSuffix(out, "\n") + } + return out, nil +} + +// detectJSONIndent returns the leading indentation used by the first indented +// non-empty line in content. +func detectJSONIndent(content string) string { + lines := strings.Split(content, "\n") + for i := 1; i < len(lines); i++ { + line := lines[i] + if strings.TrimSpace(line) == "" { + continue + } + trimmed := strings.TrimLeft(line, " \t") + if len(trimmed) == len(line) { + continue + } + return line[:len(line)-len(trimmed)] + } + return "" +} + // collectJSONLReplacements walks a parsed JSON value and collects unique // (original, redacted) string pairs for values that need redaction. func collectJSONLReplacements(v any) [][2]string { + return collectJSONReplacements(v) +} + +// collectJSONReplacements walks a parsed JSON value and collects unique +// (original, redacted) string pairs for values that need redaction. +func collectJSONReplacements(v any) [][2]string { seen := make(map[string]bool) var repls [][2]string var walk func(v any) @@ -183,7 +303,7 @@ func collectJSONLReplacements(v any) [][2]string { return } for k, child := range val { - if shouldSkipJSONLField(k) { + if shouldSkipJSONField(k) { continue } walk(child) @@ -207,6 +327,12 @@ func collectJSONLReplacements(v any) [][2]string { // shouldSkipJSONLField returns true if a JSON key should be excluded from scanning/redaction. // Skips "signature" (exact) and any key ending in "id" (case-insensitive). func shouldSkipJSONLField(key string) bool { + return shouldSkipJSONField(key) +} + +// shouldSkipJSONField returns true if a JSON key should be excluded from scanning/redaction. +// Skips "signature" (exact) and any key ending in "id" (case-insensitive). +func shouldSkipJSONField(key string) bool { if key == "signature" { return true } diff --git a/redact/redact_test.go b/redact/redact_test.go index bdd896689..c5fadb7f5 100644 --- a/redact/redact_test.go +++ b/redact/redact_test.go @@ -2,14 +2,19 @@ package redact import ( "bytes" + "encoding/json" "slices" + "strings" "testing" ) // highEntropySecret is a string with Shannon entropy > 4.5 that will trigger redaction. const highEntropySecret = "sk-ant-api03-xK9mZ2vL8nQ5rT1wY4bC7dF0gH3jE6pA" +const redactedPlaceholder = "REDACTED" func TestBytes_NoSecrets(t *testing.T) { + t.Parallel() + input := []byte("hello world, this is normal text") result := Bytes(input) if string(result) != string(input) { @@ -22,6 +27,8 @@ func TestBytes_NoSecrets(t *testing.T) { } func TestBytes_WithSecret(t *testing.T) { + t.Parallel() + input := []byte("my key is " + highEntropySecret + " ok") result := Bytes(input) expected := []byte("my key is REDACTED ok") @@ -31,6 +38,8 @@ func TestBytes_WithSecret(t *testing.T) { } func TestJSONLBytes_NoSecrets(t *testing.T) { + t.Parallel() + input := []byte(`{"type":"text","content":"hello"}`) result, err := JSONLBytes(input) if err != nil { @@ -45,6 +54,8 @@ func TestJSONLBytes_NoSecrets(t *testing.T) { } func TestJSONLBytes_WithSecret(t *testing.T) { + t.Parallel() + input := []byte(`{"type":"text","content":"key=` + highEntropySecret + `"}`) result, err := JSONLBytes(input) if err != nil { @@ -57,6 +68,8 @@ func TestJSONLBytes_WithSecret(t *testing.T) { } func TestJSONLContent_TopLevelArray(t *testing.T) { + t.Parallel() + // Top-level JSON arrays are valid JSONL and should be redacted. input := `["` + highEntropySecret + `","normal text"]` result, err := JSONLContent(input) @@ -70,6 +83,8 @@ func TestJSONLContent_TopLevelArray(t *testing.T) { } func TestJSONLContent_TopLevelArrayNoSecrets(t *testing.T) { + t.Parallel() + input := `["hello","world"]` result, err := JSONLContent(input) if err != nil { @@ -81,6 +96,8 @@ func TestJSONLContent_TopLevelArrayNoSecrets(t *testing.T) { } func TestJSONLContent_InvalidJSONLine(t *testing.T) { + t.Parallel() + // Lines that aren't valid JSON should be processed with normal string redaction. input := `{"type":"text", "invalid ` + highEntropySecret + " json" result, err := JSONLContent(input) @@ -93,7 +110,141 @@ func TestJSONLContent_InvalidJSONLine(t *testing.T) { } } +func TestJSONBytes_PreservesIDFields(t *testing.T) { + t.Parallel() + + idFieldSecret := highEntropySecret + "-session-id" + intentSecret := highEntropySecret + input := `{"session_id":"` + idFieldSecret + `","intent":"` + intentSecret + `","agent_id":"` + idFieldSecret + `"}` + result, err := JSONBytes([]byte(input)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // If this field were not considered ID-like, it would be redacted. + if String(idFieldSecret) == idFieldSecret { + t.Fatal("id field secret should be redacted when not excluded") + } + + var got map[string]any + if err := json.Unmarshal(result, &got); err != nil { + t.Fatalf("unexpected unmarshal error: %v", err) + } + if got["session_id"] != idFieldSecret { + t.Errorf("session_id should be preserved, got %q", got["session_id"]) + } + if got["intent"] != redactedPlaceholder { + t.Errorf("intent should be redacted, got %q", got["intent"]) + } + if got["agent_id"] != idFieldSecret { + t.Errorf("agent_id should be preserved, got %q", got["agent_id"]) + } +} + +func TestJSONBytes_DuplicateIDAndPayloadValues(t *testing.T) { + t.Parallel() + + dup := highEntropySecret + input := `{"session_id":"` + dup + `","intent":"` + dup + `","tool":"` + dup + `"}` + result, err := JSONBytes([]byte(input)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + var got map[string]any + if err := json.Unmarshal(result, &got); err != nil { + t.Fatalf("unexpected unmarshal error: %v", err) + } + + if got["session_id"] != dup { + t.Errorf("session_id should be preserved, got %q", got["session_id"]) + } + if got["intent"] != redactedPlaceholder { + t.Errorf("intent should be redacted, got %q", got["intent"]) + } + if got["tool"] != redactedPlaceholder { + t.Errorf("tool should be redacted, got %q", got["tool"]) + } +} + +func TestJSONBytes_InvalidJSON(t *testing.T) { + t.Parallel() + + _, err := JSONBytes([]byte(`{"session":"bad"`)) + if err == nil { + t.Fatal("expected error for invalid JSON content") + } +} + +func TestJSONBytes_PreservesIndentedFormatting(t *testing.T) { + t.Parallel() + + input := []byte("{\n \"session_id\": \"sess-123\",\n \"intent\": \"" + highEntropySecret + "\"\n}\n") + + result, err := JSONBytes(input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if !strings.HasSuffix(string(result), "\n") { + t.Fatalf("expected trailing newline to be preserved, got %q", result) + } + if !strings.Contains(string(result), "\n \"") { + t.Fatalf("expected indented JSON formatting to be preserved, got %q", result) + } + + var got map[string]any + if err := json.Unmarshal(result, &got); err != nil { + t.Fatalf("unexpected unmarshal error: %v", err) + } + if got["session_id"] != "sess-123" { + t.Errorf("session_id should be preserved, got %q", got["session_id"]) + } + if got["intent"] != redactedPlaceholder { + t.Errorf("intent should be redacted, got %q", got["intent"]) + } +} + +func TestJSONBytes_PreservesCompactFormatting(t *testing.T) { + t.Parallel() + + input := []byte("{\"intent\":\"" + highEntropySecret + "\"}") + + result, err := JSONBytes(input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if strings.Contains(string(result), "\n") { + t.Fatalf("expected compact JSON output without newline, got %q", result) + } + if !strings.Contains(string(result), "\"intent\":\"REDACTED\"") { + t.Fatalf("expected compact redacted output, got %q", result) + } +} + +func TestJSONBytes_PreservesCompactWithTrailingNewline(t *testing.T) { + t.Parallel() + + input := []byte("{\"intent\":\"" + highEntropySecret + "\"}\n") + result, err := JSONBytes(input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if !strings.HasSuffix(string(result), "\n") { + t.Fatalf("expected trailing newline to be preserved, got %q", result) + } + if strings.Count(string(result), "\n") != 1 { + t.Fatalf("expected compact single-line output with trailing newline, got %q", result) + } + if !strings.Contains(string(result), "\"intent\":\"REDACTED\"") { + t.Fatalf("expected compact redacted output, got %q", result) + } +} + func TestCollectJSONLReplacements_Succeeds(t *testing.T) { + t.Parallel() + obj := map[string]any{ "content": "token=" + highEntropySecret, } @@ -106,6 +257,8 @@ func TestCollectJSONLReplacements_Succeeds(t *testing.T) { } func TestShouldSkipJSONLField(t *testing.T) { + t.Parallel() + tests := []struct { key string want bool @@ -144,6 +297,8 @@ func TestShouldSkipJSONLField(t *testing.T) { } func TestShouldSkipJSONLField_RedactionBehavior(t *testing.T) { + t.Parallel() + // Verify that secrets in skipped fields are preserved (not redacted). obj := map[string]any{ "session_id": highEntropySecret, @@ -160,6 +315,8 @@ func TestShouldSkipJSONLField_RedactionBehavior(t *testing.T) { } func TestString_PatternDetection(t *testing.T) { + t.Parallel() + // These secrets have entropy below 4.5 so entropy-only detection misses them. // Gitleaks pattern matching should catch them. tests := []struct { @@ -202,6 +359,8 @@ func TestString_PatternDetection(t *testing.T) { } func TestShouldSkipJSONLObject(t *testing.T) { + t.Parallel() + tests := []struct { name string obj map[string]any @@ -249,6 +408,8 @@ func TestShouldSkipJSONLObject(t *testing.T) { } func TestShouldSkipJSONLObject_RedactionBehavior(t *testing.T) { + t.Parallel() + // Verify that secrets inside image objects are NOT redacted. obj := map[string]any{ "type": "image",