From f09af6a4f2d147366cdd4257c3234b003853d3e3 Mon Sep 17 00:00:00 2001 From: Andrei David Date: Sat, 14 Feb 2026 01:37:19 +0000 Subject: [PATCH 1/5] fix(checkpoint): redact secrets in committed and temporary metadata - redact committed session metadata JSON before persistence (WriteCommitted session payloads) - redact summary updates before rewriting committed metadata - redact temporary task transcripts and subagent transcripts before checkpoint write - redact metadata directory files when writing temporary checkpoints - add regression tests for committed summary and temporary metadata redaction paths --- cmd/entire/cli/checkpoint/checkpoint_test.go | 356 ++++++++++++++++++ cmd/entire/cli/checkpoint/committed.go | 2 + cmd/entire/cli/checkpoint/temporary.go | 37 +- cmd/entire/cli/strategy/manual_commit_test.go | 5 +- .../cli/strategy/phase_postcommit_test.go | 12 +- 5 files changed, 395 insertions(+), 17 deletions(-) diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index 0e73ca190..643fcf0b8 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -285,6 +285,32 @@ func readLatestSessionMetadata(t *testing.T, repo *git.Repository, checkpointID return metadata } +func readFileFromCheckpointCommit(t *testing.T, repo *git.Repository, commitHash plumbing.Hash, filePath string) string { + t.Helper() + + commit, err := repo.CommitObject(commitHash) + if err != nil { + t.Fatalf("failed to get commit object: %v", err) + } + + tree, err := commit.Tree() + if err != nil { + t.Fatalf("failed to get commit tree: %v", err) + } + + file, err := tree.File(filePath) + if err != nil { + t.Fatalf("failed to find file %s: %v", filePath, err) + } + + content, err := file.Contents() + if err != nil { + t.Fatalf("failed to read file %s: %v", filePath, err) + } + + return content +} + // Note: Tests for Agents array and SessionCount fields have been removed // as those fields were removed from CommittedMetadata in the simplification. @@ -2857,6 +2883,336 @@ func TestCopyMetadataDir_RedactsSecrets(t *testing.T) { } } +func TestWriteCommitted_MetadataJSON_RedactsSecrets(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("aabbccddeef4") + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: "redact-metadata-json-session", + Strategy: "manual-commit", + Transcript: []byte(`{"msg":"safe"}`), + Summary: &Summary{ + Intent: highEntropySecret, + }, + CheckpointsCount: 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + metadata := readLatestSessionMetadata(t, repo, checkpointID) + if metadata.Summary == nil { + t.Fatal("expected summary to be present in session metadata") + } + + if strings.Contains(metadata.Summary.Intent, highEntropySecret) { + t.Error("session metadata summary intent should not contain the secret after redaction") + } + if !strings.Contains(metadata.Summary.Intent, "REDACTED") { + t.Error("session metadata summary intent should contain REDACTED placeholder") + } +} + +func TestWriteCommitted_UpdateSummary_RedactsSecrets(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("aabbccddeef5") + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: "redact-update-summary-session", + Strategy: "manual-commit", + Transcript: []byte(`{"msg":"safe"}`), + CheckpointsCount: 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + if err := store.UpdateSummary(context.Background(), checkpointID, &Summary{Intent: highEntropySecret}); err != nil { + t.Fatalf("UpdateSummary() error = %v", err) + } + + metadata := readLatestSessionMetadata(t, repo, checkpointID) + if metadata.Summary == nil { + t.Fatal("expected summary to be present in session metadata") + } + if strings.Contains(metadata.Summary.Intent, highEntropySecret) { + t.Error("session metadata summary intent should not contain the secret after redaction") + } + if !strings.Contains(metadata.Summary.Intent, "REDACTED") { + t.Error("session metadata summary intent should contain REDACTED placeholder") + } +} + +func TestWriteTemporaryTask_MetadataFiles_RedactSecrets(t *testing.T) { + tempDir := t.TempDir() + + repo, err := git.PlainInit(tempDir, false) + if err != nil { + t.Fatalf("failed to init git repo: %v", err) + } + + worktree, err := repo.Worktree() + if err != nil { + t.Fatalf("failed to get worktree: %v", err) + } + + readmeFile := filepath.Join(tempDir, "README.md") + if err := os.WriteFile(readmeFile, []byte("# Test"), 0644); err != nil { + t.Fatalf("failed to write README: %v", err) + } + if _, err := worktree.Add("README.md"); err != nil { + t.Fatalf("failed to add README: %v", err) + } + initialCommit, err := worktree.Commit("Initial commit", &git.CommitOptions{ + Author: &object.Signature{Name: "Test", Email: "test@example.com"}, + }) + if err != nil { + t.Fatalf("failed to commit initial tree: %v", err) + } + + store := NewGitStore(repo) + + transcriptPath := filepath.Join(tempDir, "task-full.jsonl") + if err := os.WriteFile(transcriptPath, []byte(`{"role":"assistant","content":"`+highEntropySecret+`"}`+"\n"), 0644); err != nil { + t.Fatalf("failed to write task transcript: %v", err) + } + + agentTranscriptPath := filepath.Join(tempDir, "agent-transcript.jsonl") + if err := os.WriteFile(agentTranscriptPath, []byte(`{"role":"assistant","content":"agent `+highEntropySecret+`"}`+"\n"), 0644); err != nil { + t.Fatalf("failed to write agent transcript: %v", err) + } + + commitHash, err := store.WriteTemporaryTask(context.Background(), WriteTemporaryTaskOptions{ + SessionID: "redact-temp-task-session", + BaseCommit: initialCommit.String(), + ToolUseID: "tool-1", + AgentID: "agent-1", + TranscriptPath: transcriptPath, + SubagentTranscriptPath: agentTranscriptPath, + CommitMessage: "task checkpoint", + AuthorName: "Test", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteTemporaryTask() error = %v", err) + } + + metadataDir := strings.TrimSuffix(paths.EntireMetadataDir, "/") + "/redact-temp-task-session" + transcriptFile := metadataDir + "/" + paths.TranscriptFileName + subagentFile := strings.TrimSuffix(paths.EntireMetadataDir, "/") + "/redact-temp-task-session/tasks/tool-1/agent-agent-1.jsonl" + + transcriptContent := readFileFromCheckpointCommit(t, repo, commitHash, transcriptFile) + if strings.Contains(transcriptContent, highEntropySecret) { + t.Error("task transcript should not contain the secret after redaction") + } + if !strings.Contains(transcriptContent, "REDACTED") { + t.Error("task transcript should contain REDACTED placeholder") + } + + subagentContent := readFileFromCheckpointCommit(t, repo, commitHash, subagentFile) + if strings.Contains(subagentContent, highEntropySecret) { + t.Error("subagent transcript should not contain the secret after redaction") + } + if !strings.Contains(subagentContent, "REDACTED") { + t.Error("subagent transcript should contain REDACTED placeholder") + } +} + +func TestWriteTemporary_MetadataDir_RedactsSecrets(t *testing.T) { + tempDir := t.TempDir() + + repo, err := git.PlainInit(tempDir, false) + if err != nil { + t.Fatalf("failed to init git repo: %v", err) + } + + worktree, err := repo.Worktree() + if err != nil { + t.Fatalf("failed to get worktree: %v", err) + } + + readmeFile := filepath.Join(tempDir, "README.md") + if err := os.WriteFile(readmeFile, []byte("# Test"), 0644); err != nil { + t.Fatalf("failed to write README: %v", err) + } + if _, err := worktree.Add("README.md"); err != nil { + t.Fatalf("failed to add README: %v", err) + } + if _, err := worktree.Commit("Initial commit", &git.CommitOptions{ + Author: &object.Signature{Name: "Test", Email: "test@example.com"}, + }); err != nil { + t.Fatalf("failed to create initial commit: %v", err) + } + + origDir, err := os.Getwd() + if err != nil { + t.Fatalf("failed to get current working directory: %v", err) + } + t.Cleanup(func() { + t.Chdir(origDir) + }) + + // Ensure paths.RepoRoot() resolves to this repo in this test. + t.Chdir(tempDir) + + store := NewGitStore(repo) + metadataDir := filepath.Join(tempDir, ".entire", "metadata", "test-session") + if err := os.MkdirAll(metadataDir, 0o755); err != nil { + t.Fatalf("failed to create metadata dir: %v", err) + } + + fullPath := filepath.Join(metadataDir, "full.jsonl") + if err := os.WriteFile(fullPath, []byte(`{"content":"secret=`+highEntropySecret+`"}`+"\n"), 0644); err != nil { + t.Fatalf("failed to write full.jsonl: %v", err) + } + + notePath := filepath.Join(metadataDir, "notes.txt") + if err := os.WriteFile(notePath, []byte("secret="+highEntropySecret), 0644); err != nil { + t.Fatalf("failed to write notes.txt: %v", err) + } + + head, err := repo.Head() + if err != nil { + t.Fatalf("failed to get HEAD: %v", err) + } + + result, err := store.WriteTemporary(context.Background(), WriteTemporaryOptions{ + SessionID: "redact-temp-commit-session", + BaseCommit: head.Hash().String(), + ModifiedFiles: []string{"README.md"}, + MetadataDir: ".entire/metadata/test-session", + MetadataDirAbs: metadataDir, + CommitMessage: "checkpoint", + AuthorName: "Test", + AuthorEmail: "test@example.com", + IsFirstCheckpoint: false, + }) + if err != nil { + t.Fatalf("WriteTemporary() error = %v", err) + } + + full := readFileFromCheckpointCommit(t, repo, result.CommitHash, ".entire/metadata/test-session/full.jsonl") + if strings.Contains(full, highEntropySecret) { + t.Error("full.jsonl should not contain the secret after redaction") + } + if !strings.Contains(full, "REDACTED") { + t.Error("full.jsonl should contain REDACTED placeholder") + } + + notes := readFileFromCheckpointCommit(t, repo, result.CommitHash, ".entire/metadata/test-session/notes.txt") + if strings.Contains(notes, highEntropySecret) { + t.Error("notes.txt should not contain the secret after redaction") + } + if !strings.Contains(notes, "REDACTED") { + t.Error("notes.txt should contain REDACTED placeholder") + } +} + +func TestWriteTemporary_MetadataDir_SkipsSymlinks(t *testing.T) { + tempDir := t.TempDir() + + repo, err := git.PlainInit(tempDir, false) + if err != nil { + t.Fatalf("failed to init git repo: %v", err) + } + + worktree, err := repo.Worktree() + if err != nil { + t.Fatalf("failed to get worktree: %v", err) + } + + readmeFile := filepath.Join(tempDir, "README.md") + if err := os.WriteFile(readmeFile, []byte("# Test"), 0644); err != nil { + t.Fatalf("failed to write README: %v", err) + } + if _, err := worktree.Add("README.md"); err != nil { + t.Fatalf("failed to add README: %v", err) + } + if _, err := worktree.Commit("Initial commit", &git.CommitOptions{ + Author: &object.Signature{Name: "Test", Email: "test@example.com"}, + }); err != nil { + t.Fatalf("failed to create initial commit: %v", err) + } + + origDir, err := os.Getwd() + if err != nil { + t.Fatalf("failed to get current working directory: %v", err) + } + t.Cleanup(func() { + t.Chdir(origDir) + }) + t.Chdir(tempDir) + + store := NewGitStore(repo) + sessionID := "redact-temp-symlink-session" + metadataDir := filepath.Join(tempDir, ".entire", "metadata", sessionID) + if err := os.MkdirAll(metadataDir, 0o755); err != nil { + t.Fatalf("failed to create metadata dir: %v", err) + } + + regularFile := filepath.Join(metadataDir, "notes.txt") + if err := os.WriteFile(regularFile, []byte("secret="+highEntropySecret), 0644); err != nil { + t.Fatalf("failed to write notes.txt: %v", err) + } + + sensitiveFile := filepath.Join(tempDir, "sensitive.txt") + if err := os.WriteFile(sensitiveFile, []byte("very-secret-data"), 0644); err != nil { + t.Fatalf("failed to write sensitive file: %v", err) + } + symlinkPath := filepath.Join(metadataDir, "sneaky-link") + if err := os.Symlink(sensitiveFile, symlinkPath); err != nil { + t.Fatalf("failed to create symlink: %v", err) + } + + head, err := repo.Head() + if err != nil { + t.Fatalf("failed to get HEAD: %v", err) + } + + result, err := store.WriteTemporary(context.Background(), WriteTemporaryOptions{ + SessionID: sessionID, + BaseCommit: head.Hash().String(), + ModifiedFiles: []string{"README.md"}, + MetadataDir: filepath.ToSlash(filepath.Join(paths.EntireMetadataDir, sessionID)), + MetadataDirAbs: metadataDir, + CommitMessage: "checkpoint", + AuthorName: "Test", + AuthorEmail: "test@example.com", + IsFirstCheckpoint: false, + }) + if err != nil { + t.Fatalf("WriteTemporary() error = %v", err) + } + + regular := readFileFromCheckpointCommit(t, repo, result.CommitHash, filepath.ToSlash(filepath.Join(paths.EntireMetadataDir, sessionID, "notes.txt"))) + if strings.Contains(regular, highEntropySecret) { + t.Error("notes.txt should not contain the secret after redaction") + } + if !strings.Contains(regular, "REDACTED") { + t.Error("notes.txt should contain REDACTED placeholder") + } + + commit, err := repo.CommitObject(result.CommitHash) + if err != nil { + t.Fatalf("failed to get checkpoint commit object: %v", err) + } + tree, err := commit.Tree() + if err != nil { + t.Fatalf("failed to get checkpoint tree: %v", err) + } + if _, err := tree.File(filepath.ToSlash(filepath.Join(paths.EntireMetadataDir, sessionID, "sneaky-link"))); err == nil { + t.Error("sneaky-link should not be included in temporary checkpoint metadata") + } +} + // TestWriteCommitted_CLIVersionField verifies that buildinfo.Version is written // to both the root CheckpointSummary and session-level CommittedMetadata. func TestWriteCommitted_CLIVersionField(t *testing.T) { diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index 67cf6947c..fd6d75ad1 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -355,6 +355,7 @@ func (s *GitStore) writeSessionToSubdirectory(opts WriteCommittedOptions, sessio if err != nil { return filePaths, fmt.Errorf("failed to marshal session metadata: %w", err) } + metadataJSON = redact.Bytes(metadataJSON) metadataHash, err := CreateBlobFromContent(s.repo, metadataJSON) if err != nil { return filePaths, err @@ -976,6 +977,7 @@ func (s *GitStore) UpdateSummary(ctx context.Context, checkpointID id.Checkpoint if err != nil { return fmt.Errorf("failed to marshal metadata: %w", err) } + metadataJSON = redact.Bytes(metadataJSON) metadataHash, err := CreateBlobFromContent(s.repo, metadataJSON) if err != nil { return fmt.Errorf("failed to create metadata blob: %w", err) diff --git a/cmd/entire/cli/checkpoint/temporary.go b/cmd/entire/cli/checkpoint/temporary.go index 15dee61ca..8654493d4 100644 --- a/cmd/entire/cli/checkpoint/temporary.go +++ b/cmd/entire/cli/checkpoint/temporary.go @@ -347,11 +347,20 @@ func (s *GitStore) addTaskMetadataToTree(baseTreeHash plumbing.Hash, opts WriteT // Add session transcript (with chunking support for large transcripts) if opts.TranscriptPath != "" { if transcriptContent, readErr := os.ReadFile(opts.TranscriptPath); readErr == nil { + redactedTranscriptContent, redactErr := redact.JSONLBytes(transcriptContent) + if redactErr != nil { + logging.Warn(context.Background(), "failed to redact task transcript as JSONL, falling back to plain text redaction", + slog.String("error", redactErr.Error()), + slog.String("session_id", opts.SessionID), + ) + redactedTranscriptContent = redact.Bytes(transcriptContent) + } + // Detect agent type from content for proper chunking - agentType := agent.DetectAgentTypeFromContent(transcriptContent) + agentType := agent.DetectAgentTypeFromContent(redactedTranscriptContent) // Chunk if necessary - chunks, chunkErr := agent.ChunkTranscript(transcriptContent, agentType) + chunks, chunkErr := agent.ChunkTranscript(redactedTranscriptContent, agentType) if chunkErr != nil { logging.Warn(context.Background(), "failed to chunk transcript, checkpoint will be saved without transcript", slog.String("error", chunkErr.Error()), @@ -382,7 +391,17 @@ func (s *GitStore) addTaskMetadataToTree(baseTreeHash plumbing.Hash, opts WriteT // Add subagent transcript if available if opts.SubagentTranscriptPath != "" && opts.AgentID != "" { if agentContent, readErr := os.ReadFile(opts.SubagentTranscriptPath); readErr == nil { - if blobHash, blobErr := CreateBlobFromContent(s.repo, agentContent); blobErr == nil { + redactedAgentContent, redactErr := redact.JSONLBytes(agentContent) + if redactErr != nil { + logging.Warn(context.Background(), "failed to redact subagent transcript as JSONL, falling back to plain text redaction", + slog.String("error", redactErr.Error()), + slog.String("session_id", opts.SessionID), + slog.String("agent_id", opts.AgentID), + ) + redactedAgentContent = redact.Bytes(agentContent) + } + + if blobHash, blobErr := CreateBlobFromContent(s.repo, redactedAgentContent); blobErr == nil { agentPath := taskMetadataDir + "/agent-" + opts.AgentID + ".jsonl" entries[agentPath] = object.TreeEntry{ Name: agentPath, @@ -880,19 +899,23 @@ func addDirectoryToEntriesWithAbsPath(repo *git.Repository, dirPathAbs, dirPathR if info.IsDir() { return nil } + if info.Mode()&os.ModeSymlink != 0 { + return nil + } - // Calculate relative path within the directory, then join with dirPathRel for tree entry relWithinDir, err := filepath.Rel(dirPathAbs, path) if err != nil { return fmt.Errorf("failed to get relative path for %s: %w", path, err) } + if strings.HasPrefix(relWithinDir, "..") { + return fmt.Errorf("path traversal detected: %s", relWithinDir) + } - blobHash, mode, err := createBlobFromFile(repo, path) + treePath := filepath.Join(dirPathRel, relWithinDir) + blobHash, mode, err := createRedactedBlobFromFile(repo, path, treePath) if err != nil { return fmt.Errorf("failed to create blob for %s: %w", path, err) } - - treePath := filepath.Join(dirPathRel, relWithinDir) entries[treePath] = object.TreeEntry{ Name: treePath, Mode: mode, diff --git a/cmd/entire/cli/strategy/manual_commit_test.go b/cmd/entire/cli/strategy/manual_commit_test.go index 07aec19c6..ffd417046 100644 --- a/cmd/entire/cli/strategy/manual_commit_test.go +++ b/cmd/entire/cli/strategy/manual_commit_test.go @@ -1479,10 +1479,7 @@ func TestShadowStrategy_CondenseSession_EphemeralBranchTrailer(t *testing.T) { t.Fatalf("failed to create metadata dir: %v", err) } - //nolint:goconst // test data repeated across test functions - transcript := `{"type":"human","message":{"content":"test prompt"}} -{"type":"assistant","message":{"content":"test response"}} -` + transcript := testSessionTranscript if err := os.WriteFile(filepath.Join(metadataDirAbs, paths.TranscriptFileName), []byte(transcript), 0o644); err != nil { t.Fatalf("failed to write transcript: %v", err) } diff --git a/cmd/entire/cli/strategy/phase_postcommit_test.go b/cmd/entire/cli/strategy/phase_postcommit_test.go index 0d1115908..dfc3f8472 100644 --- a/cmd/entire/cli/strategy/phase_postcommit_test.go +++ b/cmd/entire/cli/strategy/phase_postcommit_test.go @@ -20,6 +20,10 @@ import ( "github.com/stretchr/testify/require" ) +const testSessionTranscript = `{"type":"human","message":{"content":"test prompt"}} +{"type":"assistant","message":{"content":"test response"}} +` + // TestPostCommit_ActiveSession_NoCondensation verifies that PostCommit on an // ACTIVE session transitions to ACTIVE_COMMITTED without condensing. // The shadow branch must be preserved because the session is still active. @@ -1256,9 +1260,7 @@ func setupSessionWithFileChange(t *testing.T, s *ManualCommitStrategy, _ *git.Re metadataDirAbs := filepath.Join(dir, metadataDir) require.NoError(t, os.MkdirAll(metadataDirAbs, 0o755)) - transcript := `{"type":"human","message":{"content":"test prompt"}} -{"type":"assistant","message":{"content":"test response"}} -` + transcript := testSessionTranscript require.NoError(t, os.WriteFile( filepath.Join(metadataDirAbs, paths.TranscriptFileName), []byte(transcript), 0o644)) @@ -1288,9 +1290,7 @@ func setupSessionWithCheckpoint(t *testing.T, s *ManualCommitStrategy, _ *git.Re metadataDirAbs := filepath.Join(dir, metadataDir) require.NoError(t, os.MkdirAll(metadataDirAbs, 0o755)) - transcript := `{"type":"human","message":{"content":"test prompt"}} -{"type":"assistant","message":{"content":"test response"}} -` + transcript := testSessionTranscript require.NoError(t, os.WriteFile( filepath.Join(metadataDirAbs, paths.TranscriptFileName), []byte(transcript), 0o644)) From 4b8748fe51fa69f8b148d1a140df2905fe79bf66 Mon Sep 17 00:00:00 2001 From: Andrei David Date: Sat, 14 Feb 2026 11:38:04 +0000 Subject: [PATCH 2/5] fix(redact): use JSON-aware redaction for committed metadata --- cmd/entire/cli/checkpoint/checkpoint_test.go | 87 ++++++++++++++++++++ cmd/entire/cli/checkpoint/committed.go | 10 ++- redact/redact.go | 42 ++++++++++ redact/redact_test.go | 27 ++++++ 4 files changed, 164 insertions(+), 2 deletions(-) diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index 643fcf0b8..b9b495e03 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -2951,6 +2951,93 @@ func TestWriteCommitted_UpdateSummary_RedactsSecrets(t *testing.T) { } } +func TestWriteCommitted_MetadataJSON_PreservesMetadataIDFields(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("aabbccddeef6") + + sessionID := "session_" + highEntropySecret + toolUseID := "toolu_" + highEntropySecret + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: sessionID, + ToolUseID: toolUseID, + Strategy: "manual-commit", + Transcript: []byte(`{"msg":"safe"}`), + CheckpointsCount: 1, + Summary: &Summary{ + Intent: highEntropySecret, + }, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + metadata := readLatestSessionMetadata(t, repo, checkpointID) + if metadata.SessionID != sessionID { + t.Errorf("session metadata session_id = %q, want %q", metadata.SessionID, sessionID) + } + if metadata.ToolUseID != toolUseID { + t.Errorf("session metadata tool_use_id = %q, want %q", metadata.ToolUseID, toolUseID) + } + if metadata.Summary == nil { + t.Fatal("expected summary to be present in session metadata") + } + if strings.Contains(metadata.Summary.Intent, highEntropySecret) { + t.Error("session metadata summary intent should not contain the secret after redaction") + } + if !strings.Contains(metadata.Summary.Intent, "REDACTED") { + t.Error("session metadata summary intent should contain REDACTED placeholder") + } +} + +func TestWriteCommitted_UpdateSummary_PreservesMetadataIDFields(t *testing.T) { + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("aabbccddeef7") + + sessionID := "session_" + highEntropySecret + toolUseID := "toolu_" + highEntropySecret + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: sessionID, + ToolUseID: toolUseID, + Strategy: "manual-commit", + Transcript: []byte(`{"msg":"safe"}`), + CheckpointsCount: 1, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + if err := store.UpdateSummary(context.Background(), checkpointID, &Summary{Intent: highEntropySecret}); err != nil { + t.Fatalf("UpdateSummary() error = %v", err) + } + + metadata := readLatestSessionMetadata(t, repo, checkpointID) + if metadata.SessionID != sessionID { + t.Errorf("session metadata session_id = %q, want %q", metadata.SessionID, sessionID) + } + if metadata.ToolUseID != toolUseID { + t.Errorf("session metadata tool_use_id = %q, want %q", metadata.ToolUseID, toolUseID) + } + if metadata.Summary == nil { + t.Fatal("expected summary to be present in session metadata") + } + if strings.Contains(metadata.Summary.Intent, highEntropySecret) { + t.Error("session metadata summary intent should not contain the secret after redaction") + } + if !strings.Contains(metadata.Summary.Intent, "REDACTED") { + t.Error("session metadata summary intent should contain REDACTED placeholder") + } +} + func TestWriteTemporaryTask_MetadataFiles_RedactSecrets(t *testing.T) { tempDir := t.TempDir() diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index fd6d75ad1..b480ed84d 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -355,7 +355,10 @@ func (s *GitStore) writeSessionToSubdirectory(opts WriteCommittedOptions, sessio if err != nil { return filePaths, fmt.Errorf("failed to marshal session metadata: %w", err) } - metadataJSON = redact.Bytes(metadataJSON) + metadataJSON, err = redact.JSONBytes(metadataJSON) + if err != nil { + return filePaths, fmt.Errorf("failed to redact metadata json: %w", err) + } metadataHash, err := CreateBlobFromContent(s.repo, metadataJSON) if err != nil { return filePaths, err @@ -977,7 +980,10 @@ func (s *GitStore) UpdateSummary(ctx context.Context, checkpointID id.Checkpoint if err != nil { return fmt.Errorf("failed to marshal metadata: %w", err) } - metadataJSON = redact.Bytes(metadataJSON) + metadataJSON, err = redact.JSONBytes(metadataJSON) + if err != nil { + return fmt.Errorf("failed to redact metadata json: %w", err) + } metadataHash, err := CreateBlobFromContent(s.repo, metadataJSON) if err != nil { return fmt.Errorf("failed to create metadata blob: %w", err) diff --git a/redact/redact.go b/redact/redact.go index f9212b384..822bd0919 100644 --- a/redact/redact.go +++ b/redact/redact.go @@ -128,6 +128,20 @@ func JSONLBytes(b []byte) ([]byte, error) { return []byte(redacted), nil } +// JSONBytes is a convenience wrapper around JSONContent for []byte content. +// JSON content is redacted with JSON-aware field skipping (e.g. keys ending in "id"). +func JSONBytes(b []byte) ([]byte, error) { + s := string(b) + redacted, err := JSONContent(s) + if err != nil { + return nil, err + } + if redacted == s { + return b, nil + } + return []byte(redacted), nil +} + // JSONLContent parses each line as JSON to determine which string values // need redaction, then performs targeted replacements on the raw JSON bytes. // Lines with no secrets are returned unchanged, preserving original formatting. @@ -170,6 +184,34 @@ func JSONLContent(content string) (string, error) { return b.String(), nil } +// JSONContent parses a JSON string and redacts string values while skipping +// identifier fields (keys ending in "id") and non-user payload fields. +func JSONContent(content string) (string, error) { + var parsed any + if err := json.Unmarshal([]byte(content), &parsed); err != nil { + return "", err + } + + repls := collectJSONLReplacements(parsed) + if len(repls) == 0 { + return content, nil + } + + result := content + for _, r := range repls { + origJSON, err := jsonEncodeString(r[0]) + if err != nil { + return "", err + } + replJSON, err := jsonEncodeString(r[1]) + if err != nil { + return "", err + } + result = strings.ReplaceAll(result, origJSON, replJSON) + } + return result, nil +} + // collectJSONLReplacements walks a parsed JSON value and collects unique // (original, redacted) string pairs for values that need redaction. func collectJSONLReplacements(v any) [][2]string { diff --git a/redact/redact_test.go b/redact/redact_test.go index bdd896689..06c93b8f0 100644 --- a/redact/redact_test.go +++ b/redact/redact_test.go @@ -93,6 +93,33 @@ func TestJSONLContent_InvalidJSONLine(t *testing.T) { } } +func TestJSONBytes_PreservesIDFields(t *testing.T) { + idFieldSecret := highEntropySecret + "-session-id" + intentSecret := highEntropySecret + input := `{"session_id":"` + idFieldSecret + `","intent":"` + intentSecret + `","agent_id":"` + idFieldSecret + `"}` + result, err := JSONBytes([]byte(input)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // If this field were not considered ID-like, it would be redacted. + if String(idFieldSecret) == idFieldSecret { + t.Fatal("id field secret should be redacted when not excluded") + } + + got := string(result) + if got != `{"session_id":"`+idFieldSecret+`","intent":"REDACTED","agent_id":"`+idFieldSecret+`"}` { + t.Errorf("got %q, want %q", got, `{"session_id":"`+idFieldSecret+`","intent":"REDACTED","agent_id":"`+idFieldSecret+`"}`) + } +} + +func TestJSONBytes_InvalidJSON(t *testing.T) { + _, err := JSONBytes([]byte(`{"session":"bad"`)) + if err == nil { + t.Fatal("expected error for invalid JSON content") + } +} + func TestCollectJSONLReplacements_Succeeds(t *testing.T) { obj := map[string]any{ "content": "token=" + highEntropySecret, From 96333286c8f756d2aae61d7f875269a91fd301ca Mon Sep 17 00:00:00 2001 From: Andrei David Date: Sat, 14 Feb 2026 11:58:12 +0000 Subject: [PATCH 3/5] fix(redact): structurally redact JSON and tighten traversal checks --- cmd/entire/cli/checkpoint/checkpoint_test.go | 33 +++++++++++ cmd/entire/cli/checkpoint/temporary.go | 3 +- redact/redact.go | 61 +++++++++++++++----- redact/redact_test.go | 39 ++++++++++++- 4 files changed, 119 insertions(+), 17 deletions(-) diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index b9b495e03..e678de14e 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -97,6 +97,39 @@ func TestCopyMetadataDir_SkipsSymlinks(t *testing.T) { } } +func TestAddDirectoryToEntriesWithAbsPath_AllowsLeadingDotsInFileName(t *testing.T) { + tempDir := t.TempDir() + + repo, err := git.PlainInit(tempDir, false) + if err != nil { + t.Fatalf("failed to init git repo: %v", err) + } + + entries := make(map[string]object.TreeEntry) + + metadataDir := filepath.Join(tempDir, ".entire", "metadata", "test-session") + if err := os.MkdirAll(metadataDir, 0o755); err != nil { + t.Fatalf("failed to create metadata dir: %v", err) + } + + // Filename that begins with ".." but is still inside the directory. + dotFile := filepath.Join(metadataDir, "..notes.txt") + if err := os.WriteFile(dotFile, []byte("ok"), 0o644); err != nil { + t.Fatalf("failed to write file with leading dots: %v", err) + } + + relPath := filepath.ToSlash(filepath.Join(paths.EntireMetadataDir, "test-session")) + err = addDirectoryToEntriesWithAbsPath(repo, metadataDir, relPath, entries) + if err != nil { + t.Fatalf("addDirectoryToEntriesWithAbsPath() error = %v", err) + } + + expectedPath := filepath.ToSlash(filepath.Join(relPath, "..notes.txt")) + if _, ok := entries[expectedPath]; !ok { + t.Errorf("expected file with leading dots to be included: %s", expectedPath) + } +} + // TestWriteCommitted_AgentField verifies that the Agent field is written // to both metadata.json and the commit message trailer. func TestWriteCommitted_AgentField(t *testing.T) { diff --git a/cmd/entire/cli/checkpoint/temporary.go b/cmd/entire/cli/checkpoint/temporary.go index 8654493d4..fc012e204 100644 --- a/cmd/entire/cli/checkpoint/temporary.go +++ b/cmd/entire/cli/checkpoint/temporary.go @@ -907,7 +907,8 @@ func addDirectoryToEntriesWithAbsPath(repo *git.Repository, dirPathAbs, dirPathR if err != nil { return fmt.Errorf("failed to get relative path for %s: %w", path, err) } - if strings.HasPrefix(relWithinDir, "..") { + relWithinDir = filepath.Clean(relWithinDir) + if relWithinDir == ".." || strings.HasPrefix(relWithinDir, ".."+string(filepath.Separator)) { return fmt.Errorf("path traversal detected: %s", relWithinDir) } diff --git a/redact/redact.go b/redact/redact.go index 822bd0919..dd6c683f9 100644 --- a/redact/redact.go +++ b/redact/redact.go @@ -5,6 +5,7 @@ import ( "encoding/json" "fmt" "math" + "reflect" "regexp" "sort" "strings" @@ -192,29 +193,57 @@ func JSONContent(content string) (string, error) { return "", err } - repls := collectJSONLReplacements(parsed) - if len(repls) == 0 { + redacted := redactJSONValue(parsed) + if reflect.DeepEqual(parsed, redacted) { return content, nil } - result := content - for _, r := range repls { - origJSON, err := jsonEncodeString(r[0]) - if err != nil { - return "", err + redactedJSON, err := json.Marshal(redacted) + if err != nil { + return "", err + } + return string(redactedJSON), nil +} + +// redactJSONValue walks parsed JSON and redacts string values while preserving +// skipped keys (e.g. ending in "id") and non-user payload fields. +func redactJSONValue(v any) any { + switch val := v.(type) { + case map[string]any: + if shouldSkipJSONLObject(val) { + return val } - replJSON, err := jsonEncodeString(r[1]) - if err != nil { - return "", err + out := make(map[string]any, len(val)) + for key, child := range val { + if shouldSkipJSONField(key) { + out[key] = child + continue + } + out[key] = redactJSONValue(child) + } + return out + case []any: + out := make([]any, len(val)) + for i, child := range val { + out[i] = redactJSONValue(child) } - result = strings.ReplaceAll(result, origJSON, replJSON) + return out + case string: + return String(val) + default: + return val } - return result, nil } // collectJSONLReplacements walks a parsed JSON value and collects unique // (original, redacted) string pairs for values that need redaction. func collectJSONLReplacements(v any) [][2]string { + return collectJSONReplacements(v) +} + +// collectJSONReplacements walks a parsed JSON value and collects unique +// (original, redacted) string pairs for values that need redaction. +func collectJSONReplacements(v any) [][2]string { seen := make(map[string]bool) var repls [][2]string var walk func(v any) @@ -225,7 +254,7 @@ func collectJSONLReplacements(v any) [][2]string { return } for k, child := range val { - if shouldSkipJSONLField(k) { + if shouldSkipJSONField(k) { continue } walk(child) @@ -249,6 +278,12 @@ func collectJSONLReplacements(v any) [][2]string { // shouldSkipJSONLField returns true if a JSON key should be excluded from scanning/redaction. // Skips "signature" (exact) and any key ending in "id" (case-insensitive). func shouldSkipJSONLField(key string) bool { + return shouldSkipJSONField(key) +} + +// shouldSkipJSONField returns true if a JSON key should be excluded from scanning/redaction. +// Skips "signature" (exact) and any key ending in "id" (case-insensitive). +func shouldSkipJSONField(key string) bool { if key == "signature" { return true } diff --git a/redact/redact_test.go b/redact/redact_test.go index 06c93b8f0..ef965d6fd 100644 --- a/redact/redact_test.go +++ b/redact/redact_test.go @@ -2,6 +2,7 @@ package redact import ( "bytes" + "encoding/json" "slices" "testing" ) @@ -107,9 +108,41 @@ func TestJSONBytes_PreservesIDFields(t *testing.T) { t.Fatal("id field secret should be redacted when not excluded") } - got := string(result) - if got != `{"session_id":"`+idFieldSecret+`","intent":"REDACTED","agent_id":"`+idFieldSecret+`"}` { - t.Errorf("got %q, want %q", got, `{"session_id":"`+idFieldSecret+`","intent":"REDACTED","agent_id":"`+idFieldSecret+`"}`) + var got map[string]any + if err := json.Unmarshal(result, &got); err != nil { + t.Fatalf("unexpected unmarshal error: %v", err) + } + if got["session_id"] != idFieldSecret { + t.Errorf("session_id should be preserved, got %q", got["session_id"]) + } + if got["intent"] != "REDACTED" { + t.Errorf("intent should be redacted, got %q", got["intent"]) + } + if got["agent_id"] != idFieldSecret { + t.Errorf("agent_id should be preserved, got %q", got["agent_id"]) + } +} + +func TestJSONBytes_DuplicateIDAndPayloadValues(t *testing.T) { + dup := highEntropySecret + input := `{"session_id":"` + dup + `","intent":"` + dup + `","tool":"` + dup + `"}` + result, err := JSONBytes([]byte(input)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + var got map[string]any + if err := json.Unmarshal(result, &got); err != nil { + t.Fatalf("unexpected unmarshal error: %v", err) + } + + if got["session_id"] != dup { + t.Errorf("session_id should be preserved, got %q", got["session_id"]) + } + if got["intent"] != "REDACTED" { + t.Errorf("intent should be redacted, got %q", got["intent"]) + } + if got["tool"] != "REDACTED" { + t.Errorf("tool should be redacted, got %q", got["tool"]) } } From e77525b5840bcf0f0c2ee937a6726ce3ca3b6b88 Mon Sep 17 00:00:00 2001 From: Andrei David Date: Sat, 14 Feb 2026 14:38:12 +0000 Subject: [PATCH 4/5] fix(checkpoint): scope metadata redaction and preserve JSON formatting --- cmd/entire/cli/checkpoint/checkpoint_test.go | 84 ++++++++++++++++---- cmd/entire/cli/checkpoint/committed.go | 54 ++++++++++--- cmd/entire/cli/checkpoint/temporary.go | 3 +- redact/redact.go | 59 ++++++++++++-- redact/redact_test.go | 51 +++++++++++- 5 files changed, 217 insertions(+), 34 deletions(-) diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index e678de14e..f33dd5261 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -130,6 +130,42 @@ func TestAddDirectoryToEntriesWithAbsPath_AllowsLeadingDotsInFileName(t *testing } } +func TestAddDirectoryToEntriesWithAbsPath_NormalizesTreePathSeparators(t *testing.T) { + tempDir := t.TempDir() + + repo, err := git.PlainInit(tempDir, false) + if err != nil { + t.Fatalf("failed to init git repo: %v", err) + } + + entries := make(map[string]object.TreeEntry) + + metadataDir := filepath.Join(tempDir, ".entire", "metadata", "test-session") + if err := os.MkdirAll(metadataDir, 0o755); err != nil { + t.Fatalf("failed to create metadata dir: %v", err) + } + filePath := filepath.Join(metadataDir, "notes.txt") + if err := os.WriteFile(filePath, []byte("ok"), 0o644); err != nil { + t.Fatalf("failed to write notes.txt: %v", err) + } + + // Simulate a windows-style relative path passed into tree construction. + relPath := `checkpoint\metadata\test-session` + if err := addDirectoryToEntriesWithAbsPath(repo, metadataDir, relPath, entries); err != nil { + t.Fatalf("addDirectoryToEntriesWithAbsPath() error = %v", err) + } + + expectedPath := "checkpoint/metadata/test-session/notes.txt" + if _, ok := entries[expectedPath]; !ok { + t.Fatalf("expected normalized path %q in entries", expectedPath) + } + for entryPath := range entries { + if strings.Contains(entryPath, `\`) { + t.Fatalf("entry path should use '/' separators, got %q", entryPath) + } + } +} + // TestWriteCommitted_AgentField verifies that the Agent field is written // to both metadata.json and the commit message trailer. func TestWriteCommitted_AgentField(t *testing.T) { @@ -2991,14 +3027,18 @@ func TestWriteCommitted_MetadataJSON_PreservesMetadataIDFields(t *testing.T) { sessionID := "session_" + highEntropySecret toolUseID := "toolu_" + highEntropySecret + transcriptIdentifier := "message_" + highEntropySecret + transcriptPath := ".claude/projects/" + highEntropySecret + "/transcript.jsonl" err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ - CheckpointID: checkpointID, - SessionID: sessionID, - ToolUseID: toolUseID, - Strategy: "manual-commit", - Transcript: []byte(`{"msg":"safe"}`), - CheckpointsCount: 1, + CheckpointID: checkpointID, + SessionID: sessionID, + ToolUseID: toolUseID, + Strategy: "manual-commit", + Transcript: []byte(`{"msg":"safe"}`), + CheckpointsCount: 1, + TranscriptIdentifierAtStart: transcriptIdentifier, + SessionTranscriptPath: transcriptPath, Summary: &Summary{ Intent: highEntropySecret, }, @@ -3016,6 +3056,12 @@ func TestWriteCommitted_MetadataJSON_PreservesMetadataIDFields(t *testing.T) { if metadata.ToolUseID != toolUseID { t.Errorf("session metadata tool_use_id = %q, want %q", metadata.ToolUseID, toolUseID) } + if metadata.TranscriptIdentifierAtStart != transcriptIdentifier { + t.Errorf("session metadata transcript_identifier_at_start = %q, want %q", metadata.TranscriptIdentifierAtStart, transcriptIdentifier) + } + if metadata.TranscriptPath != transcriptPath { + t.Errorf("session metadata transcript_path = %q, want %q", metadata.TranscriptPath, transcriptPath) + } if metadata.Summary == nil { t.Fatal("expected summary to be present in session metadata") } @@ -3034,16 +3080,20 @@ func TestWriteCommitted_UpdateSummary_PreservesMetadataIDFields(t *testing.T) { sessionID := "session_" + highEntropySecret toolUseID := "toolu_" + highEntropySecret + transcriptIdentifier := "message_" + highEntropySecret + transcriptPath := ".claude/projects/" + highEntropySecret + "/transcript.jsonl" err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ - CheckpointID: checkpointID, - SessionID: sessionID, - ToolUseID: toolUseID, - Strategy: "manual-commit", - Transcript: []byte(`{"msg":"safe"}`), - CheckpointsCount: 1, - AuthorName: "Test Author", - AuthorEmail: "test@example.com", + CheckpointID: checkpointID, + SessionID: sessionID, + ToolUseID: toolUseID, + Strategy: "manual-commit", + Transcript: []byte(`{"msg":"safe"}`), + CheckpointsCount: 1, + TranscriptIdentifierAtStart: transcriptIdentifier, + SessionTranscriptPath: transcriptPath, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", }) if err != nil { t.Fatalf("WriteCommitted() error = %v", err) @@ -3060,6 +3110,12 @@ func TestWriteCommitted_UpdateSummary_PreservesMetadataIDFields(t *testing.T) { if metadata.ToolUseID != toolUseID { t.Errorf("session metadata tool_use_id = %q, want %q", metadata.ToolUseID, toolUseID) } + if metadata.TranscriptIdentifierAtStart != transcriptIdentifier { + t.Errorf("session metadata transcript_identifier_at_start = %q, want %q", metadata.TranscriptIdentifierAtStart, transcriptIdentifier) + } + if metadata.TranscriptPath != transcriptPath { + t.Errorf("session metadata transcript_path = %q, want %q", metadata.TranscriptPath, transcriptPath) + } if metadata.Summary == nil { t.Fatal("expected summary to be present in session metadata") } diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index b480ed84d..7377a756e 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -346,7 +346,7 @@ func (s *GitStore) writeSessionToSubdirectory(opts WriteCommittedOptions, sessio TranscriptLinesAtStart: opts.CheckpointTranscriptStart, // Deprecated: kept for backward compat TokenUsage: opts.TokenUsage, InitialAttribution: opts.InitialAttribution, - Summary: opts.Summary, + Summary: redactSummary(opts.Summary), CLIVersion: buildinfo.Version, TranscriptPath: opts.SessionTranscriptPath, } @@ -355,10 +355,6 @@ func (s *GitStore) writeSessionToSubdirectory(opts WriteCommittedOptions, sessio if err != nil { return filePaths, fmt.Errorf("failed to marshal session metadata: %w", err) } - metadataJSON, err = redact.JSONBytes(metadataJSON) - if err != nil { - return filePaths, fmt.Errorf("failed to redact metadata json: %w", err) - } metadataHash, err := CreateBlobFromContent(s.repo, metadataJSON) if err != nil { return filePaths, err @@ -973,17 +969,13 @@ func (s *GitStore) UpdateSummary(ctx context.Context, checkpointID id.Checkpoint } // Update the summary - existingMetadata.Summary = summary + existingMetadata.Summary = redactSummary(summary) // Write updated session metadata metadataJSON, err := jsonutil.MarshalIndentWithNewline(existingMetadata, "", " ") if err != nil { return fmt.Errorf("failed to marshal metadata: %w", err) } - metadataJSON, err = redact.JSONBytes(metadataJSON) - if err != nil { - return fmt.Errorf("failed to redact metadata json: %w", err) - } metadataHash, err := CreateBlobFromContent(s.repo, metadataJSON) if err != nil { return fmt.Errorf("failed to create metadata blob: %w", err) @@ -1016,6 +1008,48 @@ func (s *GitStore) UpdateSummary(ctx context.Context, checkpointID id.Checkpoint return nil } +func redactSummary(summary *Summary) *Summary { + if summary == nil { + return nil + } + + redacted := &Summary{ + Intent: redact.String(summary.Intent), + Outcome: redact.String(summary.Outcome), + Learnings: LearningsSummary{ + Repo: redactStringSlice(summary.Learnings.Repo), + Workflow: redactStringSlice(summary.Learnings.Workflow), + }, + Friction: redactStringSlice(summary.Friction), + OpenItems: redactStringSlice(summary.OpenItems), + } + + if summary.Learnings.Code != nil { + redacted.Learnings.Code = make([]CodeLearning, len(summary.Learnings.Code)) + for i, learning := range summary.Learnings.Code { + redacted.Learnings.Code[i] = CodeLearning{ + Path: redact.String(learning.Path), + Line: learning.Line, + EndLine: learning.EndLine, + Finding: redact.String(learning.Finding), + } + } + } + + return redacted +} + +func redactStringSlice(values []string) []string { + if values == nil { + return nil + } + redacted := make([]string, len(values)) + for i, value := range values { + redacted[i] = redact.String(value) + } + return redacted +} + // ensureSessionsBranch ensures the entire/checkpoints/v1 branch exists. func (s *GitStore) ensureSessionsBranch() error { refName := plumbing.NewBranchReferenceName(paths.MetadataBranchName) diff --git a/cmd/entire/cli/checkpoint/temporary.go b/cmd/entire/cli/checkpoint/temporary.go index fc012e204..89dcba6ac 100644 --- a/cmd/entire/cli/checkpoint/temporary.go +++ b/cmd/entire/cli/checkpoint/temporary.go @@ -912,7 +912,8 @@ func addDirectoryToEntriesWithAbsPath(repo *git.Repository, dirPathAbs, dirPathR return fmt.Errorf("path traversal detected: %s", relWithinDir) } - treePath := filepath.Join(dirPathRel, relWithinDir) + treePath := filepath.ToSlash(filepath.Join(dirPathRel, relWithinDir)) + treePath = strings.ReplaceAll(treePath, `\`, "/") blobHash, mode, err := createRedactedBlobFromFile(repo, path, treePath) if err != nil { return fmt.Errorf("failed to create blob for %s: %w", path, err) diff --git a/redact/redact.go b/redact/redact.go index dd6c683f9..b4a613c1e 100644 --- a/redact/redact.go +++ b/redact/redact.go @@ -190,7 +190,7 @@ func JSONLContent(content string) (string, error) { func JSONContent(content string) (string, error) { var parsed any if err := json.Unmarshal([]byte(content), &parsed); err != nil { - return "", err + return "", fmt.Errorf("unmarshal JSON content: %w", err) } redacted := redactJSONValue(parsed) @@ -198,11 +198,7 @@ func JSONContent(content string) (string, error) { return content, nil } - redactedJSON, err := json.Marshal(redacted) - if err != nil { - return "", err - } - return string(redactedJSON), nil + return marshalJSONPreservingFormatting(redacted, content) } // redactJSONValue walks parsed JSON and redacts string values while preserving @@ -235,6 +231,57 @@ func redactJSONValue(v any) any { } } +// marshalJSONPreservingFormatting marshals v while preserving basic formatting +// characteristics from original (indentation and trailing newline). +func marshalJSONPreservingFormatting(v any, original string) (string, error) { + // Preserve compact style for single-line JSON. + if !strings.Contains(original, "\n") { + out, err := json.Marshal(v) + if err != nil { + return "", fmt.Errorf("marshal JSON content: %w", err) + } + if strings.HasSuffix(original, "\n") { + out = append(out, '\n') + } + return string(out), nil + } + + var buf bytes.Buffer + enc := json.NewEncoder(&buf) + indent := detectJSONIndent(original) + if indent == "" { + indent = " " + } + enc.SetIndent("", indent) + if err := enc.Encode(v); err != nil { + return "", fmt.Errorf("encode formatted JSON content: %w", err) + } + + out := buf.String() + if !strings.HasSuffix(original, "\n") { + out = strings.TrimSuffix(out, "\n") + } + return out, nil +} + +// detectJSONIndent returns the leading indentation used by the first indented +// non-empty line in content. +func detectJSONIndent(content string) string { + lines := strings.Split(content, "\n") + for i := 1; i < len(lines); i++ { + line := lines[i] + if strings.TrimSpace(line) == "" { + continue + } + trimmed := strings.TrimLeft(line, " \t") + if len(trimmed) == len(line) { + continue + } + return line[:len(line)-len(trimmed)] + } + return "" +} + // collectJSONLReplacements walks a parsed JSON value and collects unique // (original, redacted) string pairs for values that need redaction. func collectJSONLReplacements(v any) [][2]string { diff --git a/redact/redact_test.go b/redact/redact_test.go index ef965d6fd..191d10a73 100644 --- a/redact/redact_test.go +++ b/redact/redact_test.go @@ -4,11 +4,13 @@ import ( "bytes" "encoding/json" "slices" + "strings" "testing" ) // highEntropySecret is a string with Shannon entropy > 4.5 that will trigger redaction. const highEntropySecret = "sk-ant-api03-xK9mZ2vL8nQ5rT1wY4bC7dF0gH3jE6pA" +const redactedPlaceholder = "REDACTED" func TestBytes_NoSecrets(t *testing.T) { input := []byte("hello world, this is normal text") @@ -115,7 +117,7 @@ func TestJSONBytes_PreservesIDFields(t *testing.T) { if got["session_id"] != idFieldSecret { t.Errorf("session_id should be preserved, got %q", got["session_id"]) } - if got["intent"] != "REDACTED" { + if got["intent"] != redactedPlaceholder { t.Errorf("intent should be redacted, got %q", got["intent"]) } if got["agent_id"] != idFieldSecret { @@ -138,10 +140,10 @@ func TestJSONBytes_DuplicateIDAndPayloadValues(t *testing.T) { if got["session_id"] != dup { t.Errorf("session_id should be preserved, got %q", got["session_id"]) } - if got["intent"] != "REDACTED" { + if got["intent"] != redactedPlaceholder { t.Errorf("intent should be redacted, got %q", got["intent"]) } - if got["tool"] != "REDACTED" { + if got["tool"] != redactedPlaceholder { t.Errorf("tool should be redacted, got %q", got["tool"]) } } @@ -153,6 +155,49 @@ func TestJSONBytes_InvalidJSON(t *testing.T) { } } +func TestJSONBytes_PreservesIndentedFormatting(t *testing.T) { + input := []byte("{\n \"session_id\": \"sess-123\",\n \"intent\": \"" + highEntropySecret + "\"\n}\n") + + result, err := JSONBytes(input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if !strings.HasSuffix(string(result), "\n") { + t.Fatalf("expected trailing newline to be preserved, got %q", result) + } + if !strings.Contains(string(result), "\n \"") { + t.Fatalf("expected indented JSON formatting to be preserved, got %q", result) + } + + var got map[string]any + if err := json.Unmarshal(result, &got); err != nil { + t.Fatalf("unexpected unmarshal error: %v", err) + } + if got["session_id"] != "sess-123" { + t.Errorf("session_id should be preserved, got %q", got["session_id"]) + } + if got["intent"] != redactedPlaceholder { + t.Errorf("intent should be redacted, got %q", got["intent"]) + } +} + +func TestJSONBytes_PreservesCompactFormatting(t *testing.T) { + input := []byte("{\"intent\":\"" + highEntropySecret + "\"}") + + result, err := JSONBytes(input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if strings.Contains(string(result), "\n") { + t.Fatalf("expected compact JSON output without newline, got %q", result) + } + if !strings.Contains(string(result), "\"intent\":\"REDACTED\"") { + t.Fatalf("expected compact redacted output, got %q", result) + } +} + func TestCollectJSONLReplacements_Succeeds(t *testing.T) { obj := map[string]any{ "content": "token=" + highEntropySecret, From 338fe95b040dfa54bbf6c03feab2b9bf21743081 Mon Sep 17 00:00:00 2001 From: Andrei David Date: Sat, 14 Feb 2026 16:29:24 +0000 Subject: [PATCH 5/5] fix(checkpoint): harden committed redaction fallback and add guardrail tests Align committed metadata handling with the safer temporary-path behavior and add regression coverage for the remaining edge cases from review. Behavior fixes: - writeFinalTaskCheckpoint: when JSONL redaction fails for subagent transcript, now logs a warning and falls back to plain text redaction (redact.Bytes) instead of silently dropping content. - copyMetadataDir: normalize rel path with filepath.Clean and use strict traversal detection (".." or "../...") to avoid false positives like "..notes.txt" while still blocking escapes. - redact.marshalJSONPreservingFormatting: correctly treat compact JSON with an optional trailing newline by checking single-line content after trimming one trailing newline. Test coverage improvements: - Add committed-path regression test for malformed JSONL subagent transcript fallback redaction. - Add committed-path regression test proving copyMetadataDir accepts leading-dot filenames (e.g. "..notes.txt"). - Add strong guardrail test for redactSummary field coverage: field-count tripwires + scalar autofill + key-set checks + non-string invariants via normalized deep-equality. - Add compact-JSON-with-trailing-newline JSONBytes test. - Add t.Parallel() to safe unit tests in checkpoint/redact test files. Validation: - go test ./redact ./cmd/entire/cli/checkpoint Entire-Checkpoint: c172b811ee56 --- cmd/entire/cli/checkpoint/checkpoint_test.go | 322 +++++++++++++++++++ cmd/entire/cli/checkpoint/committed.go | 18 +- redact/redact.go | 6 +- redact/redact_test.go | 56 ++++ 4 files changed, 395 insertions(+), 7 deletions(-) diff --git a/cmd/entire/cli/checkpoint/checkpoint_test.go b/cmd/entire/cli/checkpoint/checkpoint_test.go index f33dd5261..58fbbe340 100644 --- a/cmd/entire/cli/checkpoint/checkpoint_test.go +++ b/cmd/entire/cli/checkpoint/checkpoint_test.go @@ -8,6 +8,7 @@ import ( "os" "os/exec" "path/filepath" + "reflect" "strconv" "strings" "testing" @@ -39,6 +40,8 @@ func TestCheckpointType_Values(t *testing.T) { } func TestCopyMetadataDir_SkipsSymlinks(t *testing.T) { + t.Parallel() + // Create a temp directory for the test tempDir := t.TempDir() @@ -97,7 +100,40 @@ func TestCopyMetadataDir_SkipsSymlinks(t *testing.T) { } } +func TestCopyMetadataDir_AllowsLeadingDotsInFileName(t *testing.T) { + t.Parallel() + + tempDir := t.TempDir() + + repo, err := git.PlainInit(tempDir, false) + if err != nil { + t.Fatalf("failed to init git repo: %v", err) + } + + metadataDir := filepath.Join(tempDir, "metadata") + if err := os.MkdirAll(metadataDir, 0o755); err != nil { + t.Fatalf("failed to create metadata dir: %v", err) + } + + dotFile := filepath.Join(metadataDir, "..notes.txt") + if err := os.WriteFile(dotFile, []byte("ok"), 0o644); err != nil { + t.Fatalf("failed to write file with leading dots: %v", err) + } + + store := NewGitStore(repo) + entries := make(map[string]object.TreeEntry) + if err := store.copyMetadataDir(metadataDir, "checkpoint/", entries); err != nil { + t.Fatalf("copyMetadataDir() error = %v", err) + } + + if _, ok := entries["checkpoint/..notes.txt"]; !ok { + t.Error("expected file with leading dots to be included") + } +} + func TestAddDirectoryToEntriesWithAbsPath_AllowsLeadingDotsInFileName(t *testing.T) { + t.Parallel() + tempDir := t.TempDir() repo, err := git.PlainInit(tempDir, false) @@ -131,6 +167,8 @@ func TestAddDirectoryToEntriesWithAbsPath_AllowsLeadingDotsInFileName(t *testing } func TestAddDirectoryToEntriesWithAbsPath_NormalizesTreePathSeparators(t *testing.T) { + t.Parallel() + tempDir := t.TempDir() repo, err := git.PlainInit(tempDir, false) @@ -2953,6 +2991,8 @@ func TestCopyMetadataDir_RedactsSecrets(t *testing.T) { } func TestWriteCommitted_MetadataJSON_RedactsSecrets(t *testing.T) { + t.Parallel() + repo, _ := setupBranchTestRepo(t) store := NewGitStore(repo) checkpointID := id.MustCheckpointID("aabbccddeef4") @@ -2987,6 +3027,8 @@ func TestWriteCommitted_MetadataJSON_RedactsSecrets(t *testing.T) { } func TestWriteCommitted_UpdateSummary_RedactsSecrets(t *testing.T) { + t.Parallel() + repo, _ := setupBranchTestRepo(t) store := NewGitStore(repo) checkpointID := id.MustCheckpointID("aabbccddeef5") @@ -3021,6 +3063,8 @@ func TestWriteCommitted_UpdateSummary_RedactsSecrets(t *testing.T) { } func TestWriteCommitted_MetadataJSON_PreservesMetadataIDFields(t *testing.T) { + t.Parallel() + repo, _ := setupBranchTestRepo(t) store := NewGitStore(repo) checkpointID := id.MustCheckpointID("aabbccddeef6") @@ -3074,6 +3118,8 @@ func TestWriteCommitted_MetadataJSON_PreservesMetadataIDFields(t *testing.T) { } func TestWriteCommitted_UpdateSummary_PreservesMetadataIDFields(t *testing.T) { + t.Parallel() + repo, _ := setupBranchTestRepo(t) store := NewGitStore(repo) checkpointID := id.MustCheckpointID("aabbccddeef7") @@ -3127,7 +3173,55 @@ func TestWriteCommitted_UpdateSummary_PreservesMetadataIDFields(t *testing.T) { } } +func TestWriteCommitted_TaskSubagentTranscript_FallsBackToPlainTextRedaction(t *testing.T) { + t.Parallel() + + repo, _ := setupBranchTestRepo(t) + store := NewGitStore(repo) + checkpointID := id.MustCheckpointID("aabbccddeef8") + + tempDir := t.TempDir() + subagentTranscriptPath := filepath.Join(tempDir, "subagent-transcript.jsonl") + invalidJSONL := `{"role":"assistant","content":"` + highEntropySecret + `"` + if err := os.WriteFile(subagentTranscriptPath, []byte(invalidJSONL), 0o644); err != nil { + t.Fatalf("failed to write subagent transcript: %v", err) + } + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: checkpointID, + SessionID: "redact-task-subagent-fallback-session", + Strategy: "manual-commit", + Transcript: []byte(`{"msg":"safe"}`), + CheckpointsCount: 1, + IsTask: true, + ToolUseID: "tool-1", + AgentID: "agent-1", + SubagentTranscriptPath: subagentTranscriptPath, + AuthorName: "Test Author", + AuthorEmail: "test@example.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + ref, err := repo.Reference(plumbing.NewBranchReferenceName(paths.MetadataBranchName), true) + if err != nil { + t.Fatalf("failed to get metadata branch reference: %v", err) + } + + agentPath := checkpointID.Path() + "/tasks/tool-1/agent-agent-1.jsonl" + agentContent := readFileFromCheckpointCommit(t, repo, ref.Hash(), agentPath) + if strings.Contains(agentContent, highEntropySecret) { + t.Error("subagent transcript should not contain the secret after fallback redaction") + } + if !strings.Contains(agentContent, "REDACTED") { + t.Error("subagent transcript should contain REDACTED placeholder after fallback redaction") + } +} + func TestWriteTemporaryTask_MetadataFiles_RedactSecrets(t *testing.T) { + t.Parallel() + tempDir := t.TempDir() repo, err := git.PlainInit(tempDir, false) @@ -3389,6 +3483,234 @@ func TestWriteTemporary_MetadataDir_SkipsSymlinks(t *testing.T) { } } +// TestRedactSummary_CoversAllFields guards against Summary fields being added +// without updating redactSummary(). It uses struct-shape tripwires and +// reflection to verify that every string field is redacted, no field is +// silently zeroed, and non-string data is preserved unchanged. +func TestRedactSummary_CoversAllFields(t *testing.T) { + t.Parallel() + + // Tripwire: fail when struct shape changes. + assertFieldCount(t, Summary{}, 5, "Summary") + assertFieldCount(t, LearningsSummary{}, 3, "LearningsSummary") + assertFieldCount(t, CodeLearning{}, 4, "CodeLearning") + + // Auto-populate every field: strings get a secret marker, + // ints get a non-zero sentinel, slices get one element. + input := &Summary{} + fillStructFields(reflect.ValueOf(input).Elem(), highEntropySecret) + + // Sanity: every scalar in the auto-filled input must be non-zero. + // Catches fillStructFields silently skipping a new field kind. + assertAllScalarsNonZero(t, reflect.ValueOf(input), "input") + + result := redactSummary(input) + if result == nil { + t.Fatal("redactSummary returned nil for non-nil input") + } + + inputStrings := collectStringFields(reflect.ValueOf(input), "") + resultStrings := collectStringFields(reflect.ValueOf(result), "") + + // Key sets must match exactly. + for path := range inputStrings { + if _, ok := resultStrings[path]; !ok { + t.Errorf("field %s present in input but missing in result — redactSummary does not handle it", path) + } + } + for path := range resultStrings { + if _, ok := inputStrings[path]; !ok { + t.Errorf("unexpected field %s in result but not in input", path) + } + } + + // Every input string must be non-empty (auto-fill sanity check). + for path, val := range inputStrings { + if val == "" { + t.Fatalf("auto-fill bug: field %s is empty in input", path) + } + } + + // No result string may be empty (silently zeroed) or contain the raw secret. + for path, val := range resultStrings { + if val == "" { + t.Errorf("field %s was silently zeroed — redactSummary does not copy this field", path) + } + if strings.Contains(val, highEntropySecret) { + t.Errorf("field %s still contains raw secret after redaction", path) + } + } + + // Non-string invariant: only string content may differ. + // Deep-copy both, normalize all strings to a fixed token, then DeepEqual. + inputNorm := jsonRoundTripSummary(t, input) + resultNorm := jsonRoundTripSummary(t, result) + setAllStrings(reflect.ValueOf(inputNorm).Elem(), "X") + setAllStrings(reflect.ValueOf(resultNorm).Elem(), "X") + if !reflect.DeepEqual(inputNorm, resultNorm) { + t.Errorf("redactSummary altered non-string data:\n input (normalized): %+v\n result (normalized): %+v", inputNorm, resultNorm) + } +} + +func assertFieldCount(t *testing.T, v any, expected int, name string) { + t.Helper() + actual := reflect.TypeOf(v).NumField() + if actual != expected { + t.Fatalf("%s has %d fields (expected %d) — update redactSummary() and this test", + name, actual, expected) + } +} + +// fillStructFields recursively populates every field in a struct with +// non-zero values: strings → secret, bools → true, ints/uints → 7, +// floats → 7.5, pointers → allocate and recurse, slices → one element. +func fillStructFields(v reflect.Value, secret string) { + switch v.Kind() { + case reflect.Ptr: + if v.IsNil() { + v.Set(reflect.New(v.Type().Elem())) + } + fillStructFields(v.Elem(), secret) + case reflect.Struct: + for i := range v.NumField() { + fillStructFields(v.Field(i), secret) + } + case reflect.String: + v.SetString(secret) + case reflect.Bool: + v.SetBool(true) + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + v.SetInt(7) + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + v.SetUint(7) + case reflect.Float32, reflect.Float64: + v.SetFloat(7.5) + case reflect.Slice: + s := reflect.MakeSlice(v.Type(), 1, 1) + fillStructFields(s.Index(0), secret) + v.Set(s) + } +} + +// assertAllScalarsNonZero walks a struct and fatals if any scalar field +// (string, bool, int, uint, float) is at its zero value. This guards +// against fillStructFields silently skipping a new field kind. +func assertAllScalarsNonZero(t *testing.T, v reflect.Value, prefix string) { + t.Helper() + if v.Kind() == reflect.Ptr { + if v.IsNil() { + return + } + v = v.Elem() + } + switch v.Kind() { + case reflect.Struct: + tp := v.Type() + for i := range tp.NumField() { + name := tp.Field(i).Name + p := name + if prefix != "" { + p = prefix + "." + name + } + assertAllScalarsNonZero(t, v.Field(i), p) + } + case reflect.Slice: + for i := range v.Len() { + assertAllScalarsNonZero(t, v.Index(i), fmt.Sprintf("%s[%d]", prefix, i)) + } + case reflect.String: + if v.String() == "" { + t.Fatalf("auto-fill bug: %s is empty string", prefix) + } + case reflect.Bool: + if !v.Bool() { + t.Fatalf("auto-fill bug: %s is false", prefix) + } + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + if v.Int() == 0 { + t.Fatalf("auto-fill bug: %s is zero", prefix) + } + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + if v.Uint() == 0 { + t.Fatalf("auto-fill bug: %s is zero", prefix) + } + case reflect.Float32, reflect.Float64: + if v.Float() == 0 { + t.Fatalf("auto-fill bug: %s is zero", prefix) + } + } +} + +// collectStringFields recursively extracts all string values keyed by +// dotted path (e.g. "Learnings.Code[0].Path"). +func collectStringFields(v reflect.Value, prefix string) map[string]string { + out := make(map[string]string) + if v.Kind() == reflect.Ptr { + if v.IsNil() { + return out + } + v = v.Elem() + } + switch v.Kind() { + case reflect.Struct: + t := v.Type() + for i := range t.NumField() { + name := t.Field(i).Name + p := name + if prefix != "" { + p = prefix + "." + name + } + for k, val := range collectStringFields(v.Field(i), p) { + out[k] = val + } + } + case reflect.Slice: + for i := range v.Len() { + p := fmt.Sprintf("%s[%d]", prefix, i) + for k, val := range collectStringFields(v.Index(i), p) { + out[k] = val + } + } + case reflect.String: + out[prefix] = v.String() + } + return out +} + +// setAllStrings recursively sets every string field/element to token. +func setAllStrings(v reflect.Value, token string) { + switch v.Kind() { + case reflect.Ptr: + if !v.IsNil() { + setAllStrings(v.Elem(), token) + } + case reflect.Struct: + for i := range v.NumField() { + setAllStrings(v.Field(i), token) + } + case reflect.Slice: + for i := range v.Len() { + setAllStrings(v.Index(i), token) + } + case reflect.String: + v.SetString(token) + } +} + +// jsonRoundTripSummary returns a deep copy of s via JSON round-trip. +func jsonRoundTripSummary(t *testing.T, s *Summary) *Summary { + t.Helper() + data, err := json.Marshal(s) + if err != nil { + t.Fatalf("failed to marshal Summary for deep copy: %v", err) + } + var cp Summary + if err := json.Unmarshal(data, &cp); err != nil { + t.Fatalf("failed to unmarshal Summary for deep copy: %v", err) + } + return &cp +} + // TestWriteCommitted_CLIVersionField verifies that buildinfo.Version is written // to both the root CheckpointSummary and session-level CommittedMetadata. func TestWriteCommitted_CLIVersionField(t *testing.T) { diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index 7377a756e..aeae5d0b0 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -204,10 +204,17 @@ func (s *GitStore) writeFinalTaskCheckpoint(opts WriteCommittedOptions, taskPath if opts.SubagentTranscriptPath != "" && opts.AgentID != "" { agentContent, readErr := os.ReadFile(opts.SubagentTranscriptPath) if readErr == nil { - agentContent, readErr = redact.JSONLBytes(agentContent) - } - if readErr == nil { - agentBlobHash, agentBlobErr := CreateBlobFromContent(s.repo, agentContent) + redactedAgentContent, redactErr := redact.JSONLBytes(agentContent) + if redactErr != nil { + logging.Warn(context.Background(), "failed to redact subagent transcript as JSONL, falling back to plain text redaction", + slog.String("error", redactErr.Error()), + slog.String("session_id", opts.SessionID), + slog.String("agent_id", opts.AgentID), + ) + redactedAgentContent = redact.Bytes(agentContent) + } + + agentBlobHash, agentBlobErr := CreateBlobFromContent(s.repo, redactedAgentContent) if agentBlobErr == nil { agentPath := taskPath + "agent-" + opts.AgentID + ".jsonl" entries[agentPath] = object.TreeEntry{ @@ -1155,9 +1162,10 @@ func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[str if err != nil { return fmt.Errorf("failed to get relative path for %s: %w", path, err) } + relPath = filepath.Clean(relPath) // Prevent path traversal via symlinks pointing outside the metadata dir - if strings.HasPrefix(relPath, "..") { + if relPath == ".." || strings.HasPrefix(relPath, ".."+string(filepath.Separator)) { return fmt.Errorf("path traversal detected: %s", relPath) } diff --git a/redact/redact.go b/redact/redact.go index b4a613c1e..815e7e272 100644 --- a/redact/redact.go +++ b/redact/redact.go @@ -234,8 +234,10 @@ func redactJSONValue(v any) any { // marshalJSONPreservingFormatting marshals v while preserving basic formatting // characteristics from original (indentation and trailing newline). func marshalJSONPreservingFormatting(v any, original string) (string, error) { - // Preserve compact style for single-line JSON. - if !strings.Contains(original, "\n") { + trimmedOriginal := strings.TrimSuffix(original, "\n") + // Preserve compact style when JSON content itself is a single line, + // while still preserving an optional trailing newline. + if !strings.Contains(trimmedOriginal, "\n") { out, err := json.Marshal(v) if err != nil { return "", fmt.Errorf("marshal JSON content: %w", err) diff --git a/redact/redact_test.go b/redact/redact_test.go index 191d10a73..c5fadb7f5 100644 --- a/redact/redact_test.go +++ b/redact/redact_test.go @@ -13,6 +13,8 @@ const highEntropySecret = "sk-ant-api03-xK9mZ2vL8nQ5rT1wY4bC7dF0gH3jE6pA" const redactedPlaceholder = "REDACTED" func TestBytes_NoSecrets(t *testing.T) { + t.Parallel() + input := []byte("hello world, this is normal text") result := Bytes(input) if string(result) != string(input) { @@ -25,6 +27,8 @@ func TestBytes_NoSecrets(t *testing.T) { } func TestBytes_WithSecret(t *testing.T) { + t.Parallel() + input := []byte("my key is " + highEntropySecret + " ok") result := Bytes(input) expected := []byte("my key is REDACTED ok") @@ -34,6 +38,8 @@ func TestBytes_WithSecret(t *testing.T) { } func TestJSONLBytes_NoSecrets(t *testing.T) { + t.Parallel() + input := []byte(`{"type":"text","content":"hello"}`) result, err := JSONLBytes(input) if err != nil { @@ -48,6 +54,8 @@ func TestJSONLBytes_NoSecrets(t *testing.T) { } func TestJSONLBytes_WithSecret(t *testing.T) { + t.Parallel() + input := []byte(`{"type":"text","content":"key=` + highEntropySecret + `"}`) result, err := JSONLBytes(input) if err != nil { @@ -60,6 +68,8 @@ func TestJSONLBytes_WithSecret(t *testing.T) { } func TestJSONLContent_TopLevelArray(t *testing.T) { + t.Parallel() + // Top-level JSON arrays are valid JSONL and should be redacted. input := `["` + highEntropySecret + `","normal text"]` result, err := JSONLContent(input) @@ -73,6 +83,8 @@ func TestJSONLContent_TopLevelArray(t *testing.T) { } func TestJSONLContent_TopLevelArrayNoSecrets(t *testing.T) { + t.Parallel() + input := `["hello","world"]` result, err := JSONLContent(input) if err != nil { @@ -84,6 +96,8 @@ func TestJSONLContent_TopLevelArrayNoSecrets(t *testing.T) { } func TestJSONLContent_InvalidJSONLine(t *testing.T) { + t.Parallel() + // Lines that aren't valid JSON should be processed with normal string redaction. input := `{"type":"text", "invalid ` + highEntropySecret + " json" result, err := JSONLContent(input) @@ -97,6 +111,8 @@ func TestJSONLContent_InvalidJSONLine(t *testing.T) { } func TestJSONBytes_PreservesIDFields(t *testing.T) { + t.Parallel() + idFieldSecret := highEntropySecret + "-session-id" intentSecret := highEntropySecret input := `{"session_id":"` + idFieldSecret + `","intent":"` + intentSecret + `","agent_id":"` + idFieldSecret + `"}` @@ -126,6 +142,8 @@ func TestJSONBytes_PreservesIDFields(t *testing.T) { } func TestJSONBytes_DuplicateIDAndPayloadValues(t *testing.T) { + t.Parallel() + dup := highEntropySecret input := `{"session_id":"` + dup + `","intent":"` + dup + `","tool":"` + dup + `"}` result, err := JSONBytes([]byte(input)) @@ -149,6 +167,8 @@ func TestJSONBytes_DuplicateIDAndPayloadValues(t *testing.T) { } func TestJSONBytes_InvalidJSON(t *testing.T) { + t.Parallel() + _, err := JSONBytes([]byte(`{"session":"bad"`)) if err == nil { t.Fatal("expected error for invalid JSON content") @@ -156,6 +176,8 @@ func TestJSONBytes_InvalidJSON(t *testing.T) { } func TestJSONBytes_PreservesIndentedFormatting(t *testing.T) { + t.Parallel() + input := []byte("{\n \"session_id\": \"sess-123\",\n \"intent\": \"" + highEntropySecret + "\"\n}\n") result, err := JSONBytes(input) @@ -183,6 +205,8 @@ func TestJSONBytes_PreservesIndentedFormatting(t *testing.T) { } func TestJSONBytes_PreservesCompactFormatting(t *testing.T) { + t.Parallel() + input := []byte("{\"intent\":\"" + highEntropySecret + "\"}") result, err := JSONBytes(input) @@ -198,7 +222,29 @@ func TestJSONBytes_PreservesCompactFormatting(t *testing.T) { } } +func TestJSONBytes_PreservesCompactWithTrailingNewline(t *testing.T) { + t.Parallel() + + input := []byte("{\"intent\":\"" + highEntropySecret + "\"}\n") + result, err := JSONBytes(input) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if !strings.HasSuffix(string(result), "\n") { + t.Fatalf("expected trailing newline to be preserved, got %q", result) + } + if strings.Count(string(result), "\n") != 1 { + t.Fatalf("expected compact single-line output with trailing newline, got %q", result) + } + if !strings.Contains(string(result), "\"intent\":\"REDACTED\"") { + t.Fatalf("expected compact redacted output, got %q", result) + } +} + func TestCollectJSONLReplacements_Succeeds(t *testing.T) { + t.Parallel() + obj := map[string]any{ "content": "token=" + highEntropySecret, } @@ -211,6 +257,8 @@ func TestCollectJSONLReplacements_Succeeds(t *testing.T) { } func TestShouldSkipJSONLField(t *testing.T) { + t.Parallel() + tests := []struct { key string want bool @@ -249,6 +297,8 @@ func TestShouldSkipJSONLField(t *testing.T) { } func TestShouldSkipJSONLField_RedactionBehavior(t *testing.T) { + t.Parallel() + // Verify that secrets in skipped fields are preserved (not redacted). obj := map[string]any{ "session_id": highEntropySecret, @@ -265,6 +315,8 @@ func TestShouldSkipJSONLField_RedactionBehavior(t *testing.T) { } func TestString_PatternDetection(t *testing.T) { + t.Parallel() + // These secrets have entropy below 4.5 so entropy-only detection misses them. // Gitleaks pattern matching should catch them. tests := []struct { @@ -307,6 +359,8 @@ func TestString_PatternDetection(t *testing.T) { } func TestShouldSkipJSONLObject(t *testing.T) { + t.Parallel() + tests := []struct { name string obj map[string]any @@ -354,6 +408,8 @@ func TestShouldSkipJSONLObject(t *testing.T) { } func TestShouldSkipJSONLObject_RedactionBehavior(t *testing.T) { + t.Parallel() + // Verify that secrets inside image objects are NOT redacted. obj := map[string]any{ "type": "image",