diff --git a/cmd/entire/cli/agent/codexcli/codex.go b/cmd/entire/cli/agent/codexcli/codex.go new file mode 100644 index 000000000..25deb3541 --- /dev/null +++ b/cmd/entire/cli/agent/codexcli/codex.go @@ -0,0 +1,272 @@ +// Package codexcli implements the Agent interface for OpenAI Codex CLI. +// Codex does not support lifecycle hooks, so integration is done via a wrapper +// command (entire codex exec) that captures the JSONL event stream. +package codexcli + +import ( + "bufio" + "encoding/json" + "errors" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "time" + + "github.com/entireio/cli/cmd/entire/cli/agent" + "github.com/entireio/cli/cmd/entire/cli/sessionid" +) + +//nolint:gochecknoinits // Agent self-registration is the intended pattern +func init() { + agent.Register(agent.AgentNameCodex, NewCodexCLIAgent) +} + +// CodexCLIAgent implements the Agent interface for OpenAI Codex CLI. +// +//nolint:revive // CodexCLIAgent is clearer than Agent in this context +type CodexCLIAgent struct{} + +// NewCodexCLIAgent creates a new Codex CLI agent instance. +func NewCodexCLIAgent() agent.Agent { + return &CodexCLIAgent{} +} + +// Name returns the agent registry key. +func (c *CodexCLIAgent) Name() agent.AgentName { + return agent.AgentNameCodex +} + +// Type returns the agent type identifier. +func (c *CodexCLIAgent) Type() agent.AgentType { + return agent.AgentTypeCodex +} + +// Description returns a human-readable description. +func (c *CodexCLIAgent) Description() string { + return "Codex CLI - OpenAI's CLI coding assistant" +} + +// DetectPresence checks if Codex CLI is available. +// Unlike hook-based agents, Codex detection checks for the binary in PATH +// since Codex does not create per-repo configuration directories. +func (c *CodexCLIAgent) DetectPresence() (bool, error) { + _, err := exec.LookPath("codex") + if err != nil { + return false, nil //nolint:nilerr // binary not found is not an error + } + return true, nil +} + +// GetHookConfigPath returns empty since Codex does not use hook config files. +func (c *CodexCLIAgent) GetHookConfigPath() string { + return "" +} + +// SupportsHooks returns false. Codex does not have a lifecycle hook system. +// Integration is achieved through the wrapper command (entire codex exec). +func (c *CodexCLIAgent) SupportsHooks() bool { + return false +} + +// ParseHookInput is not used for Codex since it does not support hooks. +// Returns an error if called. +func (c *CodexCLIAgent) ParseHookInput(_ agent.HookType, _ io.Reader) (*agent.HookInput, error) { + return nil, errors.New("codex CLI does not support hooks; use 'entire codex exec' instead") +} + +// GetSessionID extracts the session ID from hook input. +func (c *CodexCLIAgent) GetSessionID(input *agent.HookInput) string { + return input.SessionID +} + +// TransformSessionID converts a Codex thread ID to an Entire session ID. +func (c *CodexCLIAgent) TransformSessionID(agentSessionID string) string { + return agentSessionID +} + +// ExtractAgentSessionID extracts the Codex thread ID from an Entire session ID. +func (c *CodexCLIAgent) ExtractAgentSessionID(entireSessionID string) string { + return sessionid.ModelSessionID(entireSessionID) +} + +// ProtectedDirs returns an empty list. Codex does not create per-repo directories. +func (c *CodexCLIAgent) ProtectedDirs() []string { return nil } + +// userHomeDir is the function used to resolve the user's home directory. +// Overridden in tests to simulate os.UserHomeDir failures. +var userHomeDir = os.UserHomeDir + +// parseEventStreamFn is the function used to parse event streams. +// Overridden in tests to simulate parse failures. +var parseEventStreamFn = ParseEventStream + +// CodexHome returns the Codex home directory, respecting the CODEX_HOME env var. +func CodexHome() string { + if home := os.Getenv("CODEX_HOME"); home != "" { + return home + } + homeDir, err := userHomeDir() + if err != nil { + return filepath.Join(".", ".codex") + } + return filepath.Join(homeDir, ".codex") +} + +// GetSessionDir returns the directory where Codex stores session data. +func (c *CodexCLIAgent) GetSessionDir(_ string) (string, error) { + return filepath.Join(CodexHome(), "sessions"), nil +} + +// ResolveSessionFile returns the path to a Codex session file. +func (c *CodexCLIAgent) ResolveSessionFile(sessionDir, agentSessionID string) string { + return filepath.Join(sessionDir, agentSessionID+".jsonl") +} + +// ReadSession reads a session from the Codex event stream file. +func (c *CodexCLIAgent) ReadSession(input *agent.HookInput) (*agent.AgentSession, error) { + if input.SessionRef == "" { + return nil, errors.New("session reference (transcript path) is required") + } + + data, err := os.ReadFile(input.SessionRef) + if err != nil { + return nil, fmt.Errorf("failed to read transcript: %w", err) + } + + parsed, err := parseEventStreamFn(data) + if err != nil { + return nil, fmt.Errorf("failed to parse event stream: %w", err) + } + + return &agent.AgentSession{ + SessionID: input.SessionID, + AgentName: c.Name(), + SessionRef: input.SessionRef, + StartTime: time.Now(), + NativeData: data, + ModifiedFiles: parsed.ModifiedFiles, + }, nil +} + +// WriteSession writes session data to a file. +func (c *CodexCLIAgent) WriteSession(session *agent.AgentSession) error { + if session == nil { + return errors.New("session is nil") + } + if session.AgentName != "" && session.AgentName != c.Name() { + return fmt.Errorf("session belongs to agent %q, not %q", session.AgentName, c.Name()) + } + if session.SessionRef == "" { + return errors.New("session reference (transcript path) is required") + } + if len(session.NativeData) == 0 { + return errors.New("session has no native data to write") + } + + if err := os.WriteFile(session.SessionRef, session.NativeData, 0o600); err != nil { + return fmt.Errorf("failed to write transcript: %w", err) + } + return nil +} + +// FormatResumeCommand returns the command to resume a Codex session. +func (c *CodexCLIAgent) FormatResumeCommand(sessionID string) string { + return "codex exec resume " + sessionID +} + +// TranscriptAnalyzer interface implementation + +// GetTranscriptPosition returns the current line count of a Codex event stream file. +func (c *CodexCLIAgent) GetTranscriptPosition(path string) (int, error) { + return GetTranscriptPosition(path) +} + +// ExtractModifiedFilesFromOffset extracts files modified since a given line number. +// For Codex (JSONL format), offset is the starting line number. +func (c *CodexCLIAgent) ExtractModifiedFilesFromOffset(path string, startOffset int) (files []string, currentPosition int, err error) { + if path == "" { + return nil, 0, nil + } + + file, openErr := os.Open(path) //nolint:gosec // path comes from controlled transcript location + if openErr != nil { + return nil, 0, fmt.Errorf("failed to open transcript file: %w", openErr) + } + defer file.Close() + + return scanModifiedFiles(file, startOffset) +} + +// scanModifiedFiles scans JSONL lines from a reader and extracts modified file paths. +func scanModifiedFiles(r io.Reader, startOffset int) ([]string, int, error) { + scanner := newBufferedScanner(r) + lineNum := 0 + var modifiedFiles []string + seen := make(map[string]bool) + + for scanner.Scan() { + lineNum++ + if lineNum <= startOffset { + continue + } + + var event Event + if err := json.Unmarshal(scanner.Bytes(), &event); err != nil { + continue + } + + if event.Type != EventItemCompleted || len(event.Item) == 0 { + continue + } + + var envelope ItemEnvelope + if err := json.Unmarshal(event.Item, &envelope); err != nil { + continue + } + + if envelope.Type == ItemFileChange { + var item FileChangeItem + if err := json.Unmarshal(event.Item, &item); err != nil { + continue + } + for _, change := range item.Changes { + if !seen[change.Path] { + seen[change.Path] = true + modifiedFiles = append(modifiedFiles, change.Path) + } + } + } + } + + if err := scanner.Err(); err != nil { + return nil, 0, fmt.Errorf("failed to scan transcript: %w", err) + } + + return modifiedFiles, lineNum, nil +} + +// TranscriptChunker interface implementation + +// ChunkTranscript splits a JSONL event stream at line boundaries. +func (c *CodexCLIAgent) ChunkTranscript(content []byte, maxSize int) ([][]byte, error) { + chunks, err := agent.ChunkJSONL(content, maxSize) + if err != nil { + return nil, fmt.Errorf("failed to chunk JSONL transcript: %w", err) + } + return chunks, nil +} + +// ReassembleTranscript concatenates JSONL chunks. +// +//nolint:unparam // error return is required by interface, kept for consistency +func (c *CodexCLIAgent) ReassembleTranscript(chunks [][]byte) ([]byte, error) { + return agent.ReassembleJSONL(chunks), nil +} + +func newBufferedScanner(r io.Reader) *bufio.Scanner { + s := bufio.NewScanner(r) + s.Buffer(make([]byte, 0, scannerBufferSize), scannerBufferSize) + return s +} diff --git a/cmd/entire/cli/agent/codexcli/codex_test.go b/cmd/entire/cli/agent/codexcli/codex_test.go new file mode 100644 index 000000000..a5e79e22f --- /dev/null +++ b/cmd/entire/cli/agent/codexcli/codex_test.go @@ -0,0 +1,556 @@ +package codexcli + +import ( + "errors" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/entireio/cli/cmd/entire/cli/agent" +) + +func TestCodexCLIAgent_Interface(t *testing.T) { + t.Parallel() + + ag := NewCodexCLIAgent() + + if ag.Name() != agent.AgentNameCodex { + t.Errorf("Name() = %q, want %q", ag.Name(), agent.AgentNameCodex) + } + if ag.Type() != agent.AgentTypeCodex { + t.Errorf("Type() = %q, want %q", ag.Type(), agent.AgentTypeCodex) + } + if ag.Description() == "" { + t.Error("Description() should not be empty") + } + if ag.SupportsHooks() { + t.Error("SupportsHooks() should return false for Codex") + } + if ag.GetHookConfigPath() != "" { + t.Errorf("GetHookConfigPath() = %q, want empty", ag.GetHookConfigPath()) + } +} + +func TestCodexCLIAgent_ProtectedDirs(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + dirs := ag.ProtectedDirs() + if len(dirs) != 0 { + t.Errorf("ProtectedDirs() = %v, want empty", dirs) + } +} + +func TestCodexCLIAgent_ParseHookInput_ReturnsError(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + _, err := ag.ParseHookInput(agent.HookStop, strings.NewReader("{}")) + if err == nil { + t.Error("ParseHookInput() should return error for Codex") + } +} + +func TestCodexCLIAgent_TransformSessionID(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + id := "0199a213-81c0-7800-8aa1-bbab2a035a53" + if got := ag.TransformSessionID(id); got != id { + t.Errorf("TransformSessionID(%q) = %q, want identity", id, got) + } +} + +func TestCodexCLIAgent_FormatResumeCommand(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + cmd := ag.FormatResumeCommand("session-123") + if cmd != "codex exec resume session-123" { + t.Errorf("FormatResumeCommand() = %q, unexpected", cmd) + } +} + +func TestCodexHome(t *testing.T) { + t.Parallel() + + home := CodexHome() + if home == "" { + t.Error("CodexHome() should not be empty") + } + if !strings.HasSuffix(home, ".codex") { + t.Errorf("CodexHome() = %q, should end with .codex", home) + } +} + +func TestCodexCLIAgent_Registration(t *testing.T) { + t.Parallel() + + ag, err := agent.Get(agent.AgentNameCodex) + if err != nil { + t.Fatalf("agent.Get(%q) error = %v", agent.AgentNameCodex, err) + } + if ag.Name() != agent.AgentNameCodex { + t.Errorf("Name() = %q, want %q", ag.Name(), agent.AgentNameCodex) + } +} + +func TestCodexCLIAgent_ReadSession(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + path := tmpDir + "/session.jsonl" + if err := os.WriteFile(path, []byte(basicSessionJSONL), 0o600); err != nil { + t.Fatalf("failed to write temp file: %v", err) + } + + ag := &CodexCLIAgent{} + input := &agent.HookInput{ + SessionID: "test-session", + SessionRef: path, + } + + session, err := ag.ReadSession(input) + if err != nil { + t.Fatalf("ReadSession() error = %v", err) + } + + if session.SessionID != "test-session" { + t.Errorf("SessionID = %q, want %q", session.SessionID, "test-session") + } + if session.AgentName != agent.AgentNameCodex { + t.Errorf("AgentName = %q, want %q", session.AgentName, agent.AgentNameCodex) + } + if len(session.ModifiedFiles) != 2 { + t.Errorf("ModifiedFiles count = %d, want 2", len(session.ModifiedFiles)) + } + if len(session.NativeData) == 0 { + t.Error("NativeData should not be empty") + } +} + +func TestCodexCLIAgent_ReadSession_MissingRef(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + _, err := ag.ReadSession(&agent.HookInput{SessionID: "test"}) + if err == nil { + t.Error("ReadSession() should return error when SessionRef is empty") + } +} + +func TestCodexCLIAgent_WriteSession(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + tmpDir := t.TempDir() + path := tmpDir + "/test.jsonl" + + session := &agent.AgentSession{ + AgentName: agent.AgentNameCodex, + SessionRef: path, + NativeData: []byte(`{"type":"thread.started","thread_id":"test"}`), + } + + if err := ag.WriteSession(session); err != nil { + t.Fatalf("WriteSession() error = %v", err) + } + + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("failed to read written file: %v", err) + } + if string(data) != `{"type":"thread.started","thread_id":"test"}` { + t.Errorf("written data = %q, unexpected", string(data)) + } +} + +func TestCodexCLIAgent_WriteSession_WrongAgent(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + err := ag.WriteSession(&agent.AgentSession{ + AgentName: agent.AgentNameClaudeCode, + SessionRef: "/tmp/test.jsonl", + NativeData: []byte("data"), + }) + if err == nil { + t.Error("WriteSession() should return error for wrong agent") + } +} + +func TestCodexCLIAgent_GetSessionID(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + input := &agent.HookInput{SessionID: "thread-abc-123"} + if got := ag.GetSessionID(input); got != "thread-abc-123" { + t.Errorf("GetSessionID() = %q, want %q", got, "thread-abc-123") + } +} + +func TestCodexCLIAgent_ExtractAgentSessionID(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + got := ag.ExtractAgentSessionID("some-session-id") + if got == "" { + t.Error("ExtractAgentSessionID() should not return empty") + } +} + +func TestCodexCLIAgent_GetSessionDir(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + dir, err := ag.GetSessionDir("/some/repo") + if err != nil { + t.Fatalf("GetSessionDir() error = %v", err) + } + if !strings.Contains(dir, "sessions") { + t.Errorf("GetSessionDir() = %q, should contain 'sessions'", dir) + } +} + +func TestCodexCLIAgent_ResolveSessionFile(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + got := ag.ResolveSessionFile("/sessions", "thread-123") + want := filepath.Join("/sessions", "thread-123.jsonl") + if got != want { + t.Errorf("ResolveSessionFile() = %q, want %q", got, want) + } +} + +func TestCodexCLIAgent_ExtractModifiedFilesFromOffset(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "session.jsonl") + if err := os.WriteFile(path, []byte(basicSessionJSONL), 0o600); err != nil { + t.Fatalf("failed to write temp file: %v", err) + } + + ag := &CodexCLIAgent{} + + t.Run("from start", func(t *testing.T) { + t.Parallel() + files, pos, err := ag.ExtractModifiedFilesFromOffset(path, 0) + if err != nil { + t.Fatalf("ExtractModifiedFilesFromOffset() error = %v", err) + } + if len(files) != 2 { + t.Errorf("files count = %d, want 2", len(files)) + } + if pos != 8 { + t.Errorf("position = %d, want 8", pos) + } + }) + + t.Run("from offset past changes", func(t *testing.T) { + t.Parallel() + files, pos, err := ag.ExtractModifiedFilesFromOffset(path, 8) + if err != nil { + t.Fatalf("ExtractModifiedFilesFromOffset() error = %v", err) + } + if len(files) != 0 { + t.Errorf("files count = %d, want 0", len(files)) + } + if pos != 8 { + t.Errorf("position = %d, want 8", pos) + } + }) + + t.Run("empty path", func(t *testing.T) { + t.Parallel() + files, pos, err := ag.ExtractModifiedFilesFromOffset("", 0) + if err != nil { + t.Fatalf("ExtractModifiedFilesFromOffset() error = %v", err) + } + if files != nil { + t.Errorf("files = %v, want nil", files) + } + if pos != 0 { + t.Errorf("position = %d, want 0", pos) + } + }) +} + +func TestCodexCLIAgent_ChunkTranscript(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + data := []byte(basicSessionJSONL) + + chunks, err := ag.ChunkTranscript(data, 500) + if err != nil { + t.Fatalf("ChunkTranscript() error = %v", err) + } + if len(chunks) == 0 { + t.Error("ChunkTranscript() returned no chunks") + } + + reassembled, err := ag.ReassembleTranscript(chunks) + if err != nil { + t.Fatalf("ReassembleTranscript() error = %v", err) + } + if len(reassembled) == 0 { + t.Error("ReassembleTranscript() returned empty") + } +} + +func TestCodexCLIAgent_DetectPresence(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + // DetectPresence checks PATH for "codex" binary — result depends on environment + // but it should not return an error regardless + _, err := ag.DetectPresence() + if err != nil { + t.Errorf("DetectPresence() error = %v", err) + } +} + +func TestCodexCLIAgent_DetectPresence_NotInPath(t *testing.T) { + // Cannot use t.Parallel() — t.Setenv modifies process-global state + t.Setenv("PATH", t.TempDir()) + + ag := &CodexCLIAgent{} + found, err := ag.DetectPresence() + if err != nil { + t.Errorf("DetectPresence() error = %v", err) + } + if found { + t.Error("DetectPresence() = true, want false when codex not in PATH") + } +} + +func TestCodexHome_WithEnvVar(t *testing.T) { + // Cannot use t.Parallel() — t.Setenv modifies process-global state + t.Setenv("CODEX_HOME", "/custom/codex/home") + + home := CodexHome() + if home != "/custom/codex/home" { + t.Errorf("CodexHome() = %q, want %q", home, "/custom/codex/home") + } +} + +func TestCodexHome_UserHomeDirError(t *testing.T) { + // Cannot use t.Parallel() — modifies package-level state + t.Setenv("CODEX_HOME", "") + + orig := userHomeDir + userHomeDir = func() (string, error) { + return "", errors.New("no home dir") + } + t.Cleanup(func() { userHomeDir = orig }) + + home := CodexHome() + want := filepath.Join(".", ".codex") + if home != want { + t.Errorf("CodexHome() = %q, want %q", home, want) + } +} + +func TestCodexCLIAgent_ReadSession_FileNotFound(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + _, err := ag.ReadSession(&agent.HookInput{ + SessionID: "test", + SessionRef: "/nonexistent/path/session.jsonl", + }) + if err == nil { + t.Error("ReadSession() should return error for nonexistent file") + } +} + +func TestCodexCLIAgent_ReadSession_ParseError(t *testing.T) { + // Cannot use t.Parallel() — modifies package-level state + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "session.jsonl") + if err := os.WriteFile(path, []byte(`{"type":"thread.started"}`+"\n"), 0o600); err != nil { + t.Fatalf("failed to write temp file: %v", err) + } + + orig := parseEventStreamFn + parseEventStreamFn = func(_ []byte) (*ParsedSession, error) { + return nil, errors.New("injected parse error") + } + t.Cleanup(func() { parseEventStreamFn = orig }) + + ag := &CodexCLIAgent{} + _, err := ag.ReadSession(&agent.HookInput{ + SessionID: "test", + SessionRef: path, + }) + if err == nil { + t.Error("ReadSession() should return error when parsing fails") + } +} + +func TestCodexCLIAgent_WriteSession_Nil(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + if err := ag.WriteSession(nil); err == nil { + t.Error("WriteSession(nil) should return error") + } +} + +func TestCodexCLIAgent_WriteSession_EmptyRef(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + err := ag.WriteSession(&agent.AgentSession{ + AgentName: agent.AgentNameCodex, + NativeData: []byte("data"), + }) + if err == nil { + t.Error("WriteSession() should return error when SessionRef is empty") + } +} + +func TestCodexCLIAgent_WriteSession_EmptyNativeData(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + err := ag.WriteSession(&agent.AgentSession{ + AgentName: agent.AgentNameCodex, + SessionRef: "/tmp/test.jsonl", + }) + if err == nil { + t.Error("WriteSession() should return error when NativeData is empty") + } +} + +func TestCodexCLIAgent_WriteSession_InvalidPath(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + err := ag.WriteSession(&agent.AgentSession{ + AgentName: agent.AgentNameCodex, + SessionRef: "/nonexistent/deeply/nested/dir/session.jsonl", + NativeData: []byte("data"), + }) + if err == nil { + t.Error("WriteSession() should return error for invalid path") + } +} + +func TestCodexCLIAgent_GetTranscriptPosition_Method(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "session.jsonl") + if err := os.WriteFile(path, []byte(basicSessionJSONL), 0o600); err != nil { + t.Fatalf("failed to write temp file: %v", err) + } + + ag := &CodexCLIAgent{} + pos, err := ag.GetTranscriptPosition(path) + if err != nil { + t.Fatalf("GetTranscriptPosition() error = %v", err) + } + if pos != 8 { + t.Errorf("GetTranscriptPosition() = %d, want 8", pos) + } +} + +func TestCodexCLIAgent_ExtractModifiedFilesFromOffset_FileOpenError(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + _, _, err := ag.ExtractModifiedFilesFromOffset("/nonexistent/session.jsonl", 0) + if err == nil { + t.Error("ExtractModifiedFilesFromOffset() should return error for nonexistent file") + } +} + +func TestCodexCLIAgent_ExtractModifiedFilesFromOffset_MalformedData(t *testing.T) { + t.Parallel() + + // Lines cover: event unmarshal error, envelope unmarshal error, file_change unmarshal error + data := `{"type":"thread.started","thread_id":"test"} +not valid json at all +{"type":"item.completed","item":"not an object"} +{"type":"item.completed","item":{"type":"file_change","changes":"not an array"}} +{"type":"item.completed","item":{"type":"file_change","changes":[{"path":"good.go","kind":"add"}]}} +{"type":"turn.completed"} +` + + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "malformed.jsonl") + if err := os.WriteFile(path, []byte(data), 0o600); err != nil { + t.Fatalf("failed to write temp file: %v", err) + } + + ag := &CodexCLIAgent{} + files, pos, err := ag.ExtractModifiedFilesFromOffset(path, 0) + if err != nil { + t.Fatalf("ExtractModifiedFilesFromOffset() error = %v", err) + } + if len(files) != 1 || files[0] != "good.go" { + t.Errorf("files = %v, want [good.go]", files) + } + if pos != 6 { + t.Errorf("position = %d, want 6", pos) + } +} + +func TestCodexCLIAgent_ChunkTranscript_LineTooLarge(t *testing.T) { + t.Parallel() + + ag := &CodexCLIAgent{} + // A line longer than maxSize triggers an error from ChunkJSONL + data := []byte(`{"type":"thread.started","thread_id":"test-chunk-error"}`) + _, err := ag.ChunkTranscript(data, 5) + if err == nil { + t.Error("ChunkTranscript() should return error when a line exceeds maxSize") + } +} + +// errorReader is a reader that returns an error after delivering some data. +type errorReader struct { + data []byte + pos int + errAt int + err error +} + +func (r *errorReader) Read(p []byte) (int, error) { + if r.pos >= r.errAt { + return 0, r.err + } + end := r.pos + len(p) + if end > len(r.data) { + end = len(r.data) + } + if end > r.errAt { + end = r.errAt + } + n := copy(p, r.data[r.pos:end]) + r.pos += n + if r.pos >= r.errAt { + return n, r.err + } + return n, nil +} + +func TestScanModifiedFiles_ScannerError(t *testing.T) { + t.Parallel() + + data := `{"type":"item.completed","item":{"type":"file_change","changes":[{"path":"a.go","kind":"add"}]}}` + "\n" + r := &errorReader{ + data: []byte(data), + errAt: len(data) / 2, // error partway through + err: errors.New("injected IO error"), + } + + _, _, err := scanModifiedFiles(r, 0) + if err == nil { + t.Error("scanModifiedFiles() should return error on reader failure") + } +} diff --git a/cmd/entire/cli/agent/codexcli/transcript.go b/cmd/entire/cli/agent/codexcli/transcript.go new file mode 100644 index 000000000..b5baba2d2 --- /dev/null +++ b/cmd/entire/cli/agent/codexcli/transcript.go @@ -0,0 +1,197 @@ +package codexcli + +import ( + "bufio" + "bytes" + "encoding/json" + "fmt" + "io" + "os" + + "github.com/entireio/cli/cmd/entire/cli/agent" +) + +// scannerBufferSize for reading large JSONL files (10MB). +const scannerBufferSize = 10 * 1024 * 1024 + +// ParsedSession holds the normalized data extracted from a Codex JSONL event stream. +type ParsedSession struct { + ThreadID string + Messages []string + Commands []CommandExecutionItem + FileChanges []FileChange + ModifiedFiles []string + TokenUsage *agent.TokenUsage + Errors []string +} + +// ParseEventStream parses raw Codex JSONL bytes into a ParsedSession. +// Unknown event types and malformed lines are silently skipped. +func ParseEventStream(data []byte) (*ParsedSession, error) { + return parseEvents(bufio.NewScanner(bytes.NewReader(data))) +} + +// ParseEventStreamFromFile parses a Codex JSONL file into a ParsedSession. +func ParseEventStreamFromFile(path string) (*ParsedSession, error) { + file, err := os.Open(path) //nolint:gosec // path comes from controlled transcript location + if err != nil { + return nil, fmt.Errorf("failed to open transcript file: %w", err) + } + defer file.Close() + + return parseEvents(bufio.NewScanner(file)) +} + +func parseEvents(scanner *bufio.Scanner) (*ParsedSession, error) { + scanner.Buffer(make([]byte, 0, scannerBufferSize), scannerBufferSize) + + session := &ParsedSession{ + TokenUsage: &agent.TokenUsage{}, + } + + for scanner.Scan() { + line := scanner.Bytes() + if len(line) == 0 { + continue + } + + var event Event + if err := json.Unmarshal(line, &event); err != nil { + continue // skip malformed lines + } + + processEvent(session, &event) + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("failed to scan event stream: %w", err) + } + + return session, nil +} + +func processEvent(s *ParsedSession, event *Event) { + switch event.Type { + case EventThreadStarted: + s.ThreadID = event.ThreadID + + case EventTurnCompleted: + if event.Usage != nil { + s.TokenUsage.InputTokens += event.Usage.InputTokens + s.TokenUsage.CacheReadTokens += event.Usage.CachedInputTokens + s.TokenUsage.OutputTokens += event.Usage.OutputTokens + s.TokenUsage.APICallCount++ + } + + case EventTurnFailed: + if event.Error != nil { + s.Errors = append(s.Errors, event.Error.Message) + } + + case EventError: + if event.Message != "" { + s.Errors = append(s.Errors, event.Message) + } + + case EventItemCompleted: + processCompletedItem(s, event.Item) + + case EventTurnStarted, EventItemStarted, EventItemUpdated: + // no action needed for these lifecycle events + } +} + +func processCompletedItem(s *ParsedSession, raw json.RawMessage) { + if len(raw) == 0 { + return + } + + var envelope ItemEnvelope + if err := json.Unmarshal(raw, &envelope); err != nil { + return + } + + switch envelope.Type { + case ItemAgentMessage: + var item AgentMessageItem + if err := json.Unmarshal(raw, &item); err == nil && item.Text != "" { + s.Messages = append(s.Messages, item.Text) + } + + case ItemCommandExecution: + var item CommandExecutionItem + if err := json.Unmarshal(raw, &item); err == nil { + s.Commands = append(s.Commands, item) + } + + case ItemFileChange: + var item FileChangeItem + if err := json.Unmarshal(raw, &item); err == nil { + s.FileChanges = append(s.FileChanges, item.Changes...) + for _, change := range item.Changes { + s.ModifiedFiles = appendUnique(s.ModifiedFiles, change.Path) + } + } + + case ItemReasoning, ItemMCPToolCall, ItemWebSearch, ItemTodoList, ItemError: + // recognized but not stored for checkpoint purposes + } +} + +// ExtractModifiedFiles returns deduplicated file paths from parsed session data. +func ExtractModifiedFiles(s *ParsedSession) []string { + return s.ModifiedFiles +} + +// ExtractLastMessage returns the last agent message, or empty string. +func ExtractLastMessage(s *ParsedSession) string { + if len(s.Messages) == 0 { + return "" + } + return s.Messages[len(s.Messages)-1] +} + +// GetTranscriptPosition returns the line count of a Codex JSONL event file. +// Returns 0 if the file does not exist or is empty. +func GetTranscriptPosition(path string) (int, error) { + if path == "" { + return 0, nil + } + + file, err := os.Open(path) //nolint:gosec // path comes from controlled transcript location + if err != nil { + if os.IsNotExist(err) { + return 0, nil + } + return 0, fmt.Errorf("failed to open transcript file: %w", err) + } + defer file.Close() + + return countLines(file) +} + +// countLines counts newline-terminated lines in a reader. +func countLines(r io.Reader) (int, error) { + reader := bufio.NewReader(r) + count := 0 + for { + _, err := reader.ReadBytes('\n') + if err != nil { + if err == io.EOF { + break + } + return 0, fmt.Errorf("failed to read transcript: %w", err) + } + count++ + } + return count, nil +} + +func appendUnique(slice []string, val string) []string { + for _, existing := range slice { + if existing == val { + return slice + } + } + return append(slice, val) +} diff --git a/cmd/entire/cli/agent/codexcli/transcript_test.go b/cmd/entire/cli/agent/codexcli/transcript_test.go new file mode 100644 index 000000000..53c7cb660 --- /dev/null +++ b/cmd/entire/cli/agent/codexcli/transcript_test.go @@ -0,0 +1,486 @@ +package codexcli + +import ( + "bufio" + "errors" + "os" + "path/filepath" + "testing" +) + +const testBasicThreadID = "0199a213-81c0-7800-8aa1-bbab2a035a53" + +const basicSessionJSONL = `{"type":"thread.started","thread_id":"0199a213-81c0-7800-8aa1-bbab2a035a53"} +{"type":"turn.started"} +{"type":"item.completed","item":{"id":"item_0","type":"reasoning","text":"**Scanning project structure**"}} +{"type":"item.started","item":{"id":"item_1","type":"command_execution","command":"bash -lc ls","aggregated_output":"","exit_code":null,"status":"in_progress"}} +{"type":"item.completed","item":{"id":"item_1","type":"command_execution","command":"bash -lc ls","aggregated_output":"docs\nsrc\nREADME.md\n","exit_code":0,"status":"completed"}} +{"type":"item.completed","item":{"id":"item_2","type":"file_change","changes":[{"path":"src/main.go","kind":"update"},{"path":"src/util.go","kind":"add"}],"status":"completed"}} +{"type":"item.completed","item":{"id":"item_3","type":"agent_message","text":"Done. I updated src/main.go and created src/util.go."}} +{"type":"turn.completed","usage":{"input_tokens":24763,"cached_input_tokens":24448,"output_tokens":122}} +` + +const multiTurnJSONL = `{"type":"thread.started","thread_id":"abc12345-def6-7890-abcd-ef1234567890"} +{"type":"turn.started"} +{"type":"item.completed","item":{"id":"item_0","type":"command_execution","command":"bash -lc cat README.md","aggregated_output":"# Project\n","exit_code":0,"status":"completed"}} +{"type":"item.completed","item":{"id":"item_1","type":"agent_message","text":"I see the project structure. Let me make the changes."}} +{"type":"turn.completed","usage":{"input_tokens":1000,"cached_input_tokens":500,"output_tokens":50}} +{"type":"turn.started"} +{"type":"item.completed","item":{"id":"item_2","type":"file_change","changes":[{"path":"README.md","kind":"update"}],"status":"completed"}} +{"type":"item.completed","item":{"id":"item_3","type":"file_change","changes":[{"path":"docs/guide.md","kind":"add"}],"status":"completed"}} +{"type":"item.completed","item":{"id":"item_4","type":"agent_message","text":"Updated README.md and added docs/guide.md."}} +{"type":"turn.completed","usage":{"input_tokens":2000,"cached_input_tokens":1800,"output_tokens":100}} +` + +const withErrorsJSONL = `{"type":"thread.started","thread_id":"err-session-001"} +{"type":"turn.started"} +{"type":"item.completed","item":{"id":"item_0","type":"command_execution","command":"bash -lc false","aggregated_output":"","exit_code":1,"status":"failed"}} +{"type":"item.completed","item":{"id":"item_1","type":"error","message":"command output truncated"}} +{"type":"turn.failed","error":{"message":"model response stream ended unexpectedly"}} +{"type":"error","message":"stream error: broken pipe"} +` + +func TestParseEventStream_BasicSession(t *testing.T) { + t.Parallel() + + session, err := ParseEventStream([]byte(basicSessionJSONL)) + if err != nil { + t.Fatalf("ParseEventStream() error = %v", err) + } + + if session.ThreadID != testBasicThreadID { + t.Errorf("ThreadID = %q, want %q", session.ThreadID, testBasicThreadID) + } + + if len(session.Messages) != 1 { + t.Errorf("Messages count = %d, want 1", len(session.Messages)) + } else if session.Messages[0] != "Done. I updated src/main.go and created src/util.go." { + t.Errorf("Messages[0] = %q, unexpected", session.Messages[0]) + } + + if len(session.Commands) != 1 { + t.Errorf("Commands count = %d, want 1", len(session.Commands)) + } else { + if session.Commands[0].Command != "bash -lc ls" { + t.Errorf("Commands[0].Command = %q, want %q", session.Commands[0].Command, "bash -lc ls") + } + if session.Commands[0].ExitCode == nil || *session.Commands[0].ExitCode != 0 { + t.Errorf("Commands[0].ExitCode unexpected") + } + } + + if len(session.ModifiedFiles) != 2 { + t.Errorf("ModifiedFiles count = %d, want 2", len(session.ModifiedFiles)) + } + + wantFiles := map[string]bool{"src/main.go": true, "src/util.go": true} + for _, f := range session.ModifiedFiles { + if !wantFiles[f] { + t.Errorf("unexpected modified file: %q", f) + } + } + + if session.TokenUsage.InputTokens != 24763 { + t.Errorf("InputTokens = %d, want 24763", session.TokenUsage.InputTokens) + } + if session.TokenUsage.CacheReadTokens != 24448 { + t.Errorf("CacheReadTokens = %d, want 24448", session.TokenUsage.CacheReadTokens) + } + if session.TokenUsage.OutputTokens != 122 { + t.Errorf("OutputTokens = %d, want 122", session.TokenUsage.OutputTokens) + } + if session.TokenUsage.APICallCount != 1 { + t.Errorf("APICallCount = %d, want 1", session.TokenUsage.APICallCount) + } +} + +func TestParseEventStream_MultiTurn(t *testing.T) { + t.Parallel() + + session, err := ParseEventStream([]byte(multiTurnJSONL)) + if err != nil { + t.Fatalf("ParseEventStream() error = %v", err) + } + + if session.ThreadID != "abc12345-def6-7890-abcd-ef1234567890" { + t.Errorf("ThreadID = %q, unexpected", session.ThreadID) + } + + if len(session.Messages) != 2 { + t.Errorf("Messages count = %d, want 2", len(session.Messages)) + } + + if session.TokenUsage.InputTokens != 3000 { + t.Errorf("InputTokens = %d, want 3000", session.TokenUsage.InputTokens) + } + if session.TokenUsage.CacheReadTokens != 2300 { + t.Errorf("CacheReadTokens = %d, want 2300", session.TokenUsage.CacheReadTokens) + } + if session.TokenUsage.OutputTokens != 150 { + t.Errorf("OutputTokens = %d, want 150", session.TokenUsage.OutputTokens) + } + if session.TokenUsage.APICallCount != 2 { + t.Errorf("APICallCount = %d, want 2", session.TokenUsage.APICallCount) + } + + if len(session.ModifiedFiles) != 2 { + t.Errorf("ModifiedFiles count = %d, want 2", len(session.ModifiedFiles)) + } + + wantFiles := map[string]bool{"README.md": true, "docs/guide.md": true} + for _, f := range session.ModifiedFiles { + if !wantFiles[f] { + t.Errorf("unexpected modified file: %q", f) + } + } +} + +func TestParseEventStream_WithErrors(t *testing.T) { + t.Parallel() + + session, err := ParseEventStream([]byte(withErrorsJSONL)) + if err != nil { + t.Fatalf("ParseEventStream() error = %v", err) + } + + if session.ThreadID != "err-session-001" { + t.Errorf("ThreadID = %q, unexpected", session.ThreadID) + } + + if len(session.Errors) != 2 { + t.Errorf("Errors count = %d, want 2", len(session.Errors)) + } + + if len(session.Commands) != 1 { + t.Errorf("Commands count = %d, want 1", len(session.Commands)) + } else { + if session.Commands[0].ExitCode == nil || *session.Commands[0].ExitCode != 1 { + t.Errorf("Commands[0].ExitCode unexpected") + } + if session.Commands[0].Status != "failed" { + t.Errorf("Commands[0].Status = %q, want %q", session.Commands[0].Status, "failed") + } + } + + if session.TokenUsage.APICallCount != 0 { + t.Errorf("APICallCount = %d, want 0", session.TokenUsage.APICallCount) + } +} + +func TestParseEventStream_MalformedLines(t *testing.T) { + t.Parallel() + + data := []byte(`{"type":"thread.started","thread_id":"test-123"} +not valid json at all +{"type":"turn.started"} +{"broken json +{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"hello"}} +{"type":"turn.completed","usage":{"input_tokens":100,"cached_input_tokens":0,"output_tokens":10}} +`) + + session, err := ParseEventStream(data) + if err != nil { + t.Fatalf("ParseEventStream() error = %v", err) + } + + if session.ThreadID != "test-123" { + t.Errorf("ThreadID = %q, want %q", session.ThreadID, "test-123") + } + if len(session.Messages) != 1 { + t.Errorf("Messages count = %d, want 1", len(session.Messages)) + } + if session.TokenUsage.InputTokens != 100 { + t.Errorf("InputTokens = %d, want 100", session.TokenUsage.InputTokens) + } +} + +func TestParseEventStream_UnknownEvents(t *testing.T) { + t.Parallel() + + data := []byte(`{"type":"thread.started","thread_id":"test-456"} +{"type":"unknown.future.event","data":"something"} +{"type":"item.completed","item":{"id":"item_0","type":"future_item_type","data":"something"}} +{"type":"turn.completed","usage":{"input_tokens":50,"cached_input_tokens":0,"output_tokens":5}} +`) + + session, err := ParseEventStream(data) + if err != nil { + t.Fatalf("ParseEventStream() error = %v", err) + } + + if session.ThreadID != "test-456" { + t.Errorf("ThreadID = %q, want %q", session.ThreadID, "test-456") + } + if session.TokenUsage.InputTokens != 50 { + t.Errorf("InputTokens = %d, want 50", session.TokenUsage.InputTokens) + } +} + +func TestParseEventStream_EmptyInput(t *testing.T) { + t.Parallel() + + session, err := ParseEventStream([]byte{}) + if err != nil { + t.Fatalf("ParseEventStream() error = %v", err) + } + + if session.ThreadID != "" { + t.Errorf("ThreadID = %q, want empty", session.ThreadID) + } + if len(session.Messages) != 0 { + t.Errorf("Messages count = %d, want 0", len(session.Messages)) + } +} + +func TestParseEventStream_FileChangeDeduplicate(t *testing.T) { + t.Parallel() + + data := []byte(`{"type":"thread.started","thread_id":"dedup-test"} +{"type":"item.completed","item":{"id":"item_0","type":"file_change","changes":[{"path":"foo.go","kind":"update"}],"status":"completed"}} +{"type":"item.completed","item":{"id":"item_1","type":"file_change","changes":[{"path":"foo.go","kind":"update"},{"path":"bar.go","kind":"add"}],"status":"completed"}} +{"type":"turn.completed","usage":{"input_tokens":10,"cached_input_tokens":0,"output_tokens":5}} +`) + + session, err := ParseEventStream(data) + if err != nil { + t.Fatalf("ParseEventStream() error = %v", err) + } + + if len(session.ModifiedFiles) != 2 { + t.Errorf("ModifiedFiles count = %d, want 2 (deduplicated)", len(session.ModifiedFiles)) + } + + if len(session.FileChanges) != 3 { + t.Errorf("FileChanges count = %d, want 3 (not deduplicated)", len(session.FileChanges)) + } +} + +func TestExtractLastMessage(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + messages []string + want string + }{ + {"empty", nil, ""}, + {"single", []string{"hello"}, "hello"}, + {"multiple", []string{"first", "second", "third"}, "third"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + s := &ParsedSession{Messages: tt.messages} + got := ExtractLastMessage(s) + if got != tt.want { + t.Errorf("ExtractLastMessage() = %q, want %q", got, tt.want) + } + }) + } +} + +func TestParseEventStreamFromFile(t *testing.T) { + t.Parallel() + + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "session.jsonl") + if err := os.WriteFile(path, []byte(basicSessionJSONL), 0o600); err != nil { + t.Fatalf("failed to write temp file: %v", err) + } + + session, err := ParseEventStreamFromFile(path) + if err != nil { + t.Fatalf("ParseEventStreamFromFile() error = %v", err) + } + + if session.ThreadID != testBasicThreadID { + t.Errorf("ThreadID = %q, unexpected", session.ThreadID) + } +} + +func TestGetTranscriptPosition(t *testing.T) { + t.Parallel() + + t.Run("existing file", func(t *testing.T) { + t.Parallel() + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "session.jsonl") + if err := os.WriteFile(path, []byte(basicSessionJSONL), 0o600); err != nil { + t.Fatalf("failed to write temp file: %v", err) + } + + count, err := GetTranscriptPosition(path) + if err != nil { + t.Fatalf("GetTranscriptPosition() error = %v", err) + } + if count != 8 { + t.Errorf("GetTranscriptPosition() = %d, want 8", count) + } + }) + + t.Run("nonexistent file", func(t *testing.T) { + t.Parallel() + count, err := GetTranscriptPosition("nonexistent.jsonl") + if err != nil { + t.Fatalf("GetTranscriptPosition() error = %v", err) + } + if count != 0 { + t.Errorf("GetTranscriptPosition() = %d, want 0", count) + } + }) + + t.Run("empty path", func(t *testing.T) { + t.Parallel() + count, err := GetTranscriptPosition("") + if err != nil { + t.Fatalf("GetTranscriptPosition() error = %v", err) + } + if count != 0 { + t.Errorf("GetTranscriptPosition() = %d, want 0", count) + } + }) + + t.Run("permission error", func(t *testing.T) { + t.Parallel() + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "noperm.jsonl") + if err := os.WriteFile(path, []byte("data\n"), 0o000); err != nil { + t.Fatalf("failed to write temp file: %v", err) + } + count, err := GetTranscriptPosition(path) + if err == nil { + t.Skip("file permissions not enforced (likely running as root)") + } + if count != 0 { + t.Errorf("GetTranscriptPosition() = %d, want 0 on error", count) + } + }) +} + +func TestParseEventStreamFromFile_MissingFile(t *testing.T) { + t.Parallel() + + _, err := ParseEventStreamFromFile("/nonexistent/path/session.jsonl") + if err == nil { + t.Error("ParseEventStreamFromFile() should return error for nonexistent file") + } +} + +func TestParseEventStream_EmptyLines(t *testing.T) { + t.Parallel() + + data := []byte(`{"type":"thread.started","thread_id":"empty-lines-test"}` + "\n" + + "\n" + + `{"type":"turn.completed","usage":{"input_tokens":10,"cached_input_tokens":0,"output_tokens":5}}` + "\n" + + "\n") + + session, err := ParseEventStream(data) + if err != nil { + t.Fatalf("ParseEventStream() error = %v", err) + } + if session.ThreadID != "empty-lines-test" { + t.Errorf("ThreadID = %q, want %q", session.ThreadID, "empty-lines-test") + } + if session.TokenUsage.InputTokens != 10 { + t.Errorf("InputTokens = %d, want 10", session.TokenUsage.InputTokens) + } +} + +func TestParseEventStream_EmptyItemField(t *testing.T) { + t.Parallel() + + // item.completed with no item field → nil RawMessage → len(raw) == 0 + data := []byte(`{"type":"thread.started","thread_id":"empty-item-test"} +{"type":"item.completed"} +{"type":"turn.completed","usage":{"input_tokens":10,"cached_input_tokens":0,"output_tokens":5}} +`) + + session, err := ParseEventStream(data) + if err != nil { + t.Fatalf("ParseEventStream() error = %v", err) + } + if session.ThreadID != "empty-item-test" { + t.Errorf("ThreadID = %q, want %q", session.ThreadID, "empty-item-test") + } + if len(session.Messages) != 0 { + t.Errorf("Messages count = %d, want 0", len(session.Messages)) + } +} + +func TestParseEventStream_InvalidItemEnvelope(t *testing.T) { + t.Parallel() + + // item field is a JSON number, not an object — envelope unmarshal fails + data := []byte(`{"type":"thread.started","thread_id":"bad-envelope-test"} +{"type":"item.completed","item":42} +{"type":"item.completed","item":"not an object"} +{"type":"turn.completed","usage":{"input_tokens":10,"cached_input_tokens":0,"output_tokens":5}} +`) + + session, err := ParseEventStream(data) + if err != nil { + t.Fatalf("ParseEventStream() error = %v", err) + } + if session.ThreadID != "bad-envelope-test" { + t.Errorf("ThreadID = %q, want %q", session.ThreadID, "bad-envelope-test") + } + if len(session.Messages) != 0 { + t.Errorf("Messages count = %d, want 0", len(session.Messages)) + } +} + +func TestExtractModifiedFiles(t *testing.T) { + t.Parallel() + + t.Run("with files", func(t *testing.T) { + t.Parallel() + s := &ParsedSession{ModifiedFiles: []string{"foo.go", "bar.go"}} + files := ExtractModifiedFiles(s) + if len(files) != 2 { + t.Errorf("ExtractModifiedFiles() count = %d, want 2", len(files)) + } + }) + + t.Run("empty", func(t *testing.T) { + t.Parallel() + s := &ParsedSession{} + files := ExtractModifiedFiles(s) + if len(files) != 0 { + t.Errorf("ExtractModifiedFiles() count = %d, want 0", len(files)) + } + }) +} + +func TestParseEvents_ScannerError(t *testing.T) { + t.Parallel() + + data := `{"type":"thread.started","thread_id":"test"}` + "\n" + r := &errorReader{ + data: []byte(data), + errAt: len(data) / 2, + err: errors.New("injected IO error"), + } + scanner := bufio.NewScanner(r) + + _, err := parseEvents(scanner) + if err == nil { + t.Error("parseEvents() should return error on scanner failure") + } +} + +func TestCountLines_ReadError(t *testing.T) { + t.Parallel() + + data := "line1\nline2\n" + r := &errorReader{ + data: []byte(data), + errAt: len(data) / 2, + err: errors.New("injected IO error"), + } + + _, err := countLines(r) + if err == nil { + t.Error("countLines() should return error on reader failure") + } +} diff --git a/cmd/entire/cli/agent/codexcli/types.go b/cmd/entire/cli/agent/codexcli/types.go new file mode 100644 index 000000000..f72c630bf --- /dev/null +++ b/cmd/entire/cli/agent/codexcli/types.go @@ -0,0 +1,132 @@ +package codexcli + +import "encoding/json" + +// Top-level JSONL event types emitted by codex exec --json. +const ( + EventThreadStarted = "thread.started" + EventTurnStarted = "turn.started" + EventTurnCompleted = "turn.completed" + EventTurnFailed = "turn.failed" + EventItemStarted = "item.started" + EventItemUpdated = "item.updated" + EventItemCompleted = "item.completed" + EventError = "error" +) + +// Item type constants within item events. +const ( + ItemAgentMessage = "agent_message" + ItemReasoning = "reasoning" + ItemCommandExecution = "command_execution" + ItemFileChange = "file_change" + ItemMCPToolCall = "mcp_tool_call" + ItemWebSearch = "web_search" + ItemTodoList = "todo_list" + ItemError = "error" +) + +// File change kind constants. +const ( + FileChangeAdd = "add" + FileChangeUpdate = "update" + FileChangeDelete = "delete" +) + +// Event is the top-level envelope for all Codex JSONL events. +type Event struct { + Type string `json:"type"` + ThreadID string `json:"thread_id,omitempty"` + Usage *TurnUsage `json:"usage,omitempty"` + Error *ErrorDetail `json:"error,omitempty"` + Message string `json:"message,omitempty"` + Item json.RawMessage `json:"item,omitempty"` +} + +// TurnUsage contains token counts emitted with turn.completed events. +type TurnUsage struct { + InputTokens int `json:"input_tokens"` + CachedInputTokens int `json:"cached_input_tokens"` + OutputTokens int `json:"output_tokens"` +} + +// ErrorDetail is the error payload in turn.failed or error events. +type ErrorDetail struct { + Message string `json:"message"` +} + +// ItemEnvelope extracts the common fields from an item payload. +type ItemEnvelope struct { + ID string `json:"id"` + Type string `json:"type"` + Status string `json:"status,omitempty"` +} + +// AgentMessageItem is an item with type "agent_message". +type AgentMessageItem struct { + ID string `json:"id"` + Type string `json:"type"` + Text string `json:"text"` +} + +// ReasoningItem is an item with type "reasoning". +type ReasoningItem struct { + ID string `json:"id"` + Type string `json:"type"` + Text string `json:"text"` +} + +// CommandExecutionItem is an item with type "command_execution". +type CommandExecutionItem struct { + ID string `json:"id"` + Type string `json:"type"` + Command string `json:"command"` + AggregatedOutput string `json:"aggregated_output"` + ExitCode *int `json:"exit_code"` + Status string `json:"status"` +} + +// FileChangeItem is an item with type "file_change". +type FileChangeItem struct { + ID string `json:"id"` + Type string `json:"type"` + Changes []FileChange `json:"changes"` + Status string `json:"status"` +} + +// FileChange represents a single file modification within a file_change item. +type FileChange struct { + Path string `json:"path"` + Kind string `json:"kind"` +} + +// MCPToolCallItem is an item with type "mcp_tool_call". +type MCPToolCallItem struct { + ID string `json:"id"` + Type string `json:"type"` + Server string `json:"server"` + Tool string `json:"tool"` + Arguments json.RawMessage `json:"arguments"` + Result json.RawMessage `json:"result,omitempty"` + Error *string `json:"error,omitempty"` + Status string `json:"status"` +} + +// TodoListItem is an item with type "todo_list". +type TodoListItem struct { + ID string `json:"id"` + Type string `json:"type"` + Items []TodoItem `json:"items"` +} + +// TodoItem is a single entry in a todo_list item. +type TodoItem struct { + Text string `json:"text"` + Completed bool `json:"completed"` +} + +// HookNameSessionStart is the hook name for codex session start (used internally). +const HookNameSessionStart = "session-start" + +// HookNameSessionEnd is the hook name for codex session end (used internally). +const HookNameSessionEnd = "session-end" diff --git a/cmd/entire/cli/agent/registry.go b/cmd/entire/cli/agent/registry.go index 5f3df9e02..552811bfb 100644 --- a/cmd/entire/cli/agent/registry.go +++ b/cmd/entire/cli/agent/registry.go @@ -80,12 +80,14 @@ type AgentType string const ( AgentNameClaudeCode AgentName = "claude-code" AgentNameGemini AgentName = "gemini" + AgentNameCodex AgentName = "codex" ) // Agent type constants (type identifiers stored in metadata/trailers) const ( AgentTypeClaudeCode AgentType = "Claude Code" AgentTypeGemini AgentType = "Gemini CLI" + AgentTypeCodex AgentType = "Codex CLI" AgentTypeUnknown AgentType = "Agent" // Fallback for backwards compatibility ) diff --git a/cmd/entire/cli/codex.go b/cmd/entire/cli/codex.go new file mode 100644 index 000000000..047e9df3a --- /dev/null +++ b/cmd/entire/cli/codex.go @@ -0,0 +1,325 @@ +package cli + +import ( + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/entireio/cli/cmd/entire/cli/agent" + "github.com/entireio/cli/cmd/entire/cli/agent/codexcli" + "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/cmd/entire/cli/strategy" + "github.com/spf13/cobra" +) + +func newCodexCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "codex", + Short: "Codex CLI integration", + Long: "Run OpenAI Codex CLI commands with Entire checkpoint capture.", + RunE: func(cmd *cobra.Command, _ []string) error { + return cmd.Help() + }, + } + + cmd.AddCommand(newCodexExecCmd()) + return cmd +} + +func newCodexExecCmd() *cobra.Command { + var ( + model string + sandbox string + ) + + cmd := &cobra.Command{ + Use: "exec [flags] -- ", + Short: "Run codex exec and capture a checkpoint", + Long: `Run a non-interactive Codex session and capture the result as an Entire checkpoint. + +This wraps 'codex exec --json' to capture the JSONL event stream, then +stores the session data using the configured Entire strategy. + +The prompt can be passed after '--' or piped via stdin using '-'. + +Examples: + entire codex exec -- "fix the failing tests" + entire codex exec --model o3 -- "refactor the auth module" + echo "add error handling" | entire codex exec -- -`, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return runCodexExec(cmd, args, model, sandbox) + }, + } + + cmd.Flags().StringVarP(&model, "model", "m", "", "Override Codex model") + cmd.Flags().StringVarP(&sandbox, "sandbox", "s", "", "Sandbox policy (read-only, workspace-write, danger-full-access)") + + return cmd +} + +func runCodexExec(cmd *cobra.Command, args []string, model, sandbox string) error { + // Verify Entire is enabled + enabled, err := IsEnabled() + if err == nil && !enabled { + cmd.SilenceUsage = true + fmt.Fprintln(cmd.ErrOrStderr(), "Entire is not enabled. Run 'entire enable' first.") + return NewSilentError(errors.New("entire not enabled")) + } + + // Verify codex binary exists + codexPath, err := exec.LookPath("codex") + if err != nil { + cmd.SilenceUsage = true + fmt.Fprintln(cmd.ErrOrStderr(), "Codex CLI not found in PATH. Install it from https://github.com/openai/codex") + return NewSilentError(errors.New("codex not found")) + } + + // Verify we're in a git repository + if _, err := paths.RepoRoot(); err != nil { + cmd.SilenceUsage = true + fmt.Fprintln(cmd.ErrOrStderr(), "Not a git repository. Please run from within a git repository.") + return NewSilentError(errors.New("not a git repository")) + } + + prompt := strings.Join(args, " ") + + // Capture pre-execution state + preState, captureErr := capturePreExecState() + if captureErr != nil { + fmt.Fprintf(cmd.ErrOrStderr(), "Warning: failed to capture pre-exec state: %v\n", captureErr) + } + + // Build codex command args + codexArgs := []string{"exec", "--json"} + if model != "" { + codexArgs = append(codexArgs, "--model", model) + } + if sandbox != "" { + codexArgs = append(codexArgs, "--sandbox", sandbox) + } + codexArgs = append(codexArgs, prompt) + + // Create temp file for capturing JSONL output + tmpFile, err := os.CreateTemp("", "entire-codex-*.jsonl") + if err != nil { + return fmt.Errorf("failed to create temp file: %w", err) + } + tmpPath := tmpFile.Name() + defer os.Remove(tmpPath) + + // Run codex exec, tee stdout to temp file and stderr to user + //nolint:gosec // codexPath is resolved from LookPath + codexCmd := exec.CommandContext(cmd.Context(), codexPath, codexArgs...) + codexCmd.Stdin = cmd.InOrStdin() + codexCmd.Stderr = cmd.ErrOrStderr() + codexCmd.Stdout = tmpFile + + fmt.Fprintf(cmd.ErrOrStderr(), "Running: codex %s\n", strings.Join(codexArgs, " ")) + + codexErr := codexCmd.Run() + if err := tmpFile.Close(); err != nil { + return fmt.Errorf("failed to close temp file: %w", err) + } + + // Read captured JSONL + data, readErr := os.ReadFile(tmpPath) //nolint:gosec // tmpPath is from CreateTemp + if readErr != nil { + return fmt.Errorf("failed to read captured output: %w", readErr) + } + + if len(data) == 0 { + fmt.Fprintln(cmd.ErrOrStderr(), "No output captured from Codex.") + if codexErr != nil { + return codexErr //nolint:wrapcheck // propagate codex exit code + } + return nil + } + + // Parse the event stream + session, parseErr := codexcli.ParseEventStream(data) + if parseErr != nil { + fmt.Fprintf(cmd.ErrOrStderr(), "Warning: failed to parse Codex output: %v\n", parseErr) + if codexErr != nil { + return codexErr //nolint:wrapcheck // propagate codex exit code + } + return nil + } + + // Create checkpoint from parsed session + if err := createCodexCheckpoint(cmd, session, data, prompt, preState); err != nil { + return fmt.Errorf("failed to create checkpoint: %w", err) + } + + // Propagate Codex exit code + if codexErr != nil { + return codexErr //nolint:wrapcheck // propagate codex exit code + } + + return nil +} + +// preExecState stores the state captured before running Codex. +type preExecState struct { + untrackedFiles []string +} + +func capturePreExecState() (*preExecState, error) { + files, err := getUntrackedFilesForState() + if err != nil { + return nil, err + } + return &preExecState{untrackedFiles: files}, nil +} + +func createCodexCheckpoint( + cmd *cobra.Command, + session *codexcli.ParsedSession, + rawData []byte, + prompt string, + preState *preExecState, +) error { + stderr := cmd.ErrOrStderr() + + // Determine session ID from Codex thread ID + sessionID := session.ThreadID + if sessionID == "" { + sessionID = unknownSessionID + } + + // Create session metadata directory + sessionDir := paths.SessionMetadataDirFromSessionID(sessionID) + sessionDirAbs, err := paths.AbsPath(sessionDir) + if err != nil { + sessionDirAbs = sessionDir + } + if err := os.MkdirAll(sessionDirAbs, 0o750); err != nil { + return fmt.Errorf("failed to create session directory: %w", err) + } + + // Write raw transcript + transcriptFile := filepath.Join(sessionDirAbs, paths.TranscriptFileName) + if err := os.WriteFile(transcriptFile, rawData, 0o600); err != nil { + return fmt.Errorf("failed to write transcript: %w", err) + } + fmt.Fprintf(stderr, "Saved transcript to: %s/%s\n", sessionDir, paths.TranscriptFileName) + + // Write prompts file + promptFile := filepath.Join(sessionDirAbs, paths.PromptFileName) + if err := os.WriteFile(promptFile, []byte(prompt), 0o600); err != nil { + return fmt.Errorf("failed to write prompt file: %w", err) + } + + // Write summary + summaryFile := filepath.Join(sessionDirAbs, paths.SummaryFileName) + summary := codexcli.ExtractLastMessage(session) + if err := os.WriteFile(summaryFile, []byte(summary), 0o600); err != nil { + return fmt.Errorf("failed to write summary file: %w", err) + } + + // Get modified files from the event stream + modifiedFiles := session.ModifiedFiles + + // Compute new and deleted files from git status + var previouslyUntracked []string + if preState != nil { + previouslyUntracked = preState.untrackedFiles + } + changes, err := DetectFileChanges(previouslyUntracked) + if err != nil { + fmt.Fprintf(stderr, "Warning: failed to detect file changes: %v\n", err) + } + + // Get repo root for path normalization + repoRoot, err := paths.RepoRoot() + if err != nil { + return fmt.Errorf("failed to get repo root: %w", err) + } + + relModifiedFiles := FilterAndNormalizePaths(modifiedFiles, repoRoot) + var relNewFiles, relDeletedFiles []string + if changes != nil { + relNewFiles = FilterAndNormalizePaths(changes.New, repoRoot) + relDeletedFiles = FilterAndNormalizePaths(changes.Deleted, repoRoot) + } + + totalChanges := len(relModifiedFiles) + len(relNewFiles) + len(relDeletedFiles) + if totalChanges == 0 { + fmt.Fprintln(stderr, "No files were modified during this session.") + return nil + } + + fmt.Fprintf(stderr, "Files modified (%d):\n", len(relModifiedFiles)) + for _, file := range relModifiedFiles { + fmt.Fprintf(stderr, " - %s\n", file) + } + if len(relNewFiles) > 0 { + fmt.Fprintf(stderr, "New files (%d):\n", len(relNewFiles)) + for _, file := range relNewFiles { + fmt.Fprintf(stderr, " + %s\n", file) + } + } + if len(relDeletedFiles) > 0 { + fmt.Fprintf(stderr, "Deleted files (%d):\n", len(relDeletedFiles)) + for _, file := range relDeletedFiles { + fmt.Fprintf(stderr, " - %s\n", file) + } + } + + // Generate commit message + commitMessage := generateCodexCommitMessage(prompt) + fmt.Fprintf(stderr, "Commit message: %s\n", commitMessage) + + // Get git author + author, err := GetGitAuthor() + if err != nil { + return fmt.Errorf("failed to get git author: %w", err) + } + + // Get strategy and save + strat := GetStrategy() + if err := strat.EnsureSetup(); err != nil { + fmt.Fprintf(stderr, "Warning: failed to ensure strategy setup: %v\n", err) + } + + ctx := strategy.SaveContext{ + SessionID: sessionID, + ModifiedFiles: relModifiedFiles, + NewFiles: relNewFiles, + DeletedFiles: relDeletedFiles, + MetadataDir: sessionDir, + MetadataDirAbs: sessionDirAbs, + CommitMessage: commitMessage, + TranscriptPath: transcriptFile, + AuthorName: author.Name, + AuthorEmail: author.Email, + AgentType: agent.AgentTypeCodex, + TokenUsage: session.TokenUsage, + } + + if err := strat.SaveChanges(ctx); err != nil { + return fmt.Errorf("failed to save changes: %w", err) + } + + fmt.Fprintln(stderr, "Checkpoint saved.") + + // Print Codex's summary response so the user sees what it did + if summary != "" { + fmt.Fprintf(cmd.OutOrStdout(), "\n%s\n", summary) + } + + return nil +} + +func generateCodexCommitMessage(prompt string) string { + if prompt != "" { + cleaned := cleanPromptForCommit(prompt) + if cleaned != "" { + return cleaned + } + } + return "Codex CLI session updates" +} diff --git a/cmd/entire/cli/config.go b/cmd/entire/cli/config.go index 48246c5be..d898dfcde 100644 --- a/cmd/entire/cli/config.go +++ b/cmd/entire/cli/config.go @@ -11,8 +11,9 @@ import ( "github.com/entireio/cli/cmd/entire/cli/settings" "github.com/entireio/cli/cmd/entire/cli/strategy" - // Import claudecode to register the agent + // Import agent packages to register them _ "github.com/entireio/cli/cmd/entire/cli/agent/claudecode" + _ "github.com/entireio/cli/cmd/entire/cli/agent/codexcli" ) // Package-level aliases to avoid shadowing the settings package with local variables named "settings". diff --git a/cmd/entire/cli/root.go b/cmd/entire/cli/root.go index 5fedf6ad4..5227e3911 100644 --- a/cmd/entire/cli/root.go +++ b/cmd/entire/cli/root.go @@ -83,6 +83,7 @@ func NewRootCmd() *cobra.Command { cmd.AddCommand(newExplainCmd()) cmd.AddCommand(newDebugCmd()) cmd.AddCommand(newDoctorCmd()) + cmd.AddCommand(newCodexCmd()) cmd.AddCommand(newSendAnalyticsCmd()) cmd.AddCommand(newCurlBashPostInstallCmd())