Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 21 additions & 15 deletions clients/localai_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,17 @@ func (m *localAICompletionMessage) UnmarshalJSON(data []byte) error {

// CreateChatCompletion sends the chat completion request and parses the response,
// including LocalAI's optional "reasoning" field, into LLMReply.ReasoningContent.
func (llm *LocalAIClient) CreateChatCompletion(ctx context.Context, request openai.ChatCompletionRequest) (cogito.LLMReply, error) {
func (llm *LocalAIClient) CreateChatCompletion(ctx context.Context, request openai.ChatCompletionRequest) (cogito.LLMReply, cogito.LLMUsage, error) {
request.Model = llm.model
body, err := json.Marshal(request)
if err != nil {
return cogito.LLMReply{}, fmt.Errorf("localai: marshal request: %w", err)
return cogito.LLMReply{}, cogito.LLMUsage{}, fmt.Errorf("localai: marshal request: %w", err)
}

url := llm.baseURL + "/chat/completions"
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
if err != nil {
return cogito.LLMReply{}, fmt.Errorf("localai: new request: %w", err)
return cogito.LLMReply{}, cogito.LLMUsage{}, fmt.Errorf("localai: new request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("Accept", "application/json")
Expand All @@ -101,21 +101,21 @@ func (llm *LocalAIClient) CreateChatCompletion(ctx context.Context, request open

resp, err := llm.client.Do(req)
if err != nil {
return cogito.LLMReply{}, fmt.Errorf("localai: request: %w", err)
return cogito.LLMReply{}, cogito.LLMUsage{}, fmt.Errorf("localai: request: %w", err)
}
defer resp.Body.Close()

respBody, err := io.ReadAll(resp.Body)
if err != nil {
return cogito.LLMReply{}, fmt.Errorf("localai: read response: %w", err)
return cogito.LLMReply{}, cogito.LLMUsage{}, fmt.Errorf("localai: read response: %w", err)
}

if resp.StatusCode != http.StatusOK {
var errRes openai.ErrorResponse
if json.Unmarshal(respBody, &errRes) == nil && errRes.Error != nil {
return cogito.LLMReply{}, errRes.Error
return cogito.LLMReply{}, cogito.LLMUsage{}, errRes.Error
}
return cogito.LLMReply{}, &openai.RequestError{
return cogito.LLMReply{}, cogito.LLMUsage{}, &openai.RequestError{
HTTPStatus: resp.Status,
HTTPStatusCode: resp.StatusCode,
Err: fmt.Errorf("localai: %s", string(respBody)),
Expand All @@ -125,11 +125,11 @@ func (llm *LocalAIClient) CreateChatCompletion(ctx context.Context, request open

var localResp localAIChatCompletionResponse
if err := json.Unmarshal(respBody, &localResp); err != nil {
return cogito.LLMReply{}, fmt.Errorf("localai: unmarshal response: %w", err)
return cogito.LLMReply{}, cogito.LLMUsage{}, fmt.Errorf("localai: unmarshal response: %w", err)
}

if len(localResp.Choices) == 0 {
return cogito.LLMReply{}, fmt.Errorf("localai: no choices in response")
return cogito.LLMReply{}, cogito.LLMUsage{}, fmt.Errorf("localai: no choices in response")
}

choice := localResp.Choices[0]
Expand Down Expand Up @@ -157,30 +157,36 @@ func (llm *LocalAIClient) CreateChatCompletion(ctx context.Context, request open
// Ensure ReasoningContent is set for downstream (e.g. tools.go).
response.Choices[0].Message.ReasoningContent = reasoning

usage := cogito.LLMUsage{
PromptTokens: localResp.Usage.PromptTokens,
CompletionTokens: localResp.Usage.CompletionTokens,
TotalTokens: localResp.Usage.TotalTokens,
}

return cogito.LLMReply{
ChatCompletionResponse: response,
ReasoningContent: reasoning,
}, nil
}, usage, nil
}

// Ask prompts the LLM with the provided messages and returns a Fragment
// containing the response. Uses CreateChatCompletion so reasoning is preserved.
func (llm *LocalAIClient) Ask(ctx context.Context, f cogito.Fragment) (cogito.Fragment, error) {
func (llm *LocalAIClient) Ask(ctx context.Context, f cogito.Fragment) (cogito.Fragment, cogito.LLMUsage, error) {
messages := f.GetMessages()
request := openai.ChatCompletionRequest{
Model: llm.model,
Messages: messages,
}
reply, err := llm.CreateChatCompletion(ctx, request)
reply, _, err := llm.CreateChatCompletion(ctx, request)
if err != nil {
return cogito.Fragment{}, err
return cogito.Fragment{}, cogito.LLMUsage{}, err
}
if len(reply.ChatCompletionResponse.Choices) == 0 {
return cogito.Fragment{}, fmt.Errorf("localai: no choices in response")
return cogito.Fragment{}, cogito.LLMUsage{}, fmt.Errorf("localai: no choices in response")
}
return cogito.Fragment{
Messages: append(f.Messages, reply.ChatCompletionResponse.Choices[0].Message),
ParentFragment: &f,
Status: &cogito.Status{},
}, nil
}, cogito.LLMUsage{}, nil
}
30 changes: 23 additions & 7 deletions clients/openai_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func NewOpenAILLM(model, apiKey, baseURL string) *OpenAIClient {
// and returns a Fragment containing the response.
// The Fragment.GetMessages() method automatically handles force-text-reply
// when tool calls are present in the conversation history.
func (llm *OpenAIClient) Ask(ctx context.Context, f cogito.Fragment) (cogito.Fragment, error) {
func (llm *OpenAIClient) Ask(ctx context.Context, f cogito.Fragment) (cogito.Fragment, cogito.LLMUsage, error) {
// Use Fragment.GetMessages() which automatically adds force-text-reply
// system message when tool calls are detected in the conversation
messages := f.GetMessages()
Expand All @@ -40,27 +40,43 @@ func (llm *OpenAIClient) Ask(ctx context.Context, f cogito.Fragment) (cogito.Fra
},
)

if err == nil && len(resp.Choices) > 0 {
if err != nil {
return cogito.Fragment{}, cogito.LLMUsage{}, err
}

if len(resp.Choices) > 0 {
usage := cogito.LLMUsage{
PromptTokens: resp.Usage.PromptTokens,
CompletionTokens: resp.Usage.CompletionTokens,
TotalTokens: resp.Usage.TotalTokens,
}
return cogito.Fragment{
Messages: append(f.Messages, resp.Choices[0].Message),
ParentFragment: &f,
Status: &cogito.Status{},
}, nil
}, usage, nil
}

return cogito.Fragment{}, err
return cogito.Fragment{}, cogito.LLMUsage{}, nil
}

func (llm *OpenAIClient) CreateChatCompletion(ctx context.Context, request openai.ChatCompletionRequest) (cogito.LLMReply, error) {
func (llm *OpenAIClient) CreateChatCompletion(ctx context.Context, request openai.ChatCompletionRequest) (cogito.LLMReply, cogito.LLMUsage, error) {
request.Model = llm.model
response, err := llm.client.CreateChatCompletion(ctx, request)
if err != nil {
return cogito.LLMReply{}, err
return cogito.LLMReply{}, cogito.LLMUsage{}, err
}

usage := cogito.LLMUsage{
PromptTokens: response.Usage.PromptTokens,
CompletionTokens: response.Usage.CompletionTokens,
TotalTokens: response.Usage.TotalTokens,
}

return cogito.LLMReply{
ChatCompletionResponse: response,
ReasoningContent: response.Choices[0].Message.ReasoningContent,
}, nil
}, usage, nil
}

// NewOpenAIService creates a new OpenAI service instance
Expand Down
2 changes: 1 addition & 1 deletion extractors.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func ExtractKnowledgeGaps(llm LLM, f Fragment, opts ...Option) ([]string, error)
xlog.Debug("Analyzing knowledge gaps", "prompt", prompt)
newFragment := NewEmptyFragment().AddMessage("system", prompt)

f, err = llm.Ask(o.context, newFragment)
f, _, err = llm.Ask(o.context, newFragment)
if err != nil {
return nil, err
}
Expand Down
5 changes: 3 additions & 2 deletions fragment.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ type InjectedMessage struct {
}

type Status struct {
LastUsage LLMUsage // Track token usage from the last LLM call
Iterations int
ToolsCalled Tools
ToolResults []ToolStatus
Expand Down Expand Up @@ -210,7 +211,7 @@ func (r Fragment) ExtractStructure(ctx context.Context, llm LLM, s structures.St
},
}

resp, err := llm.CreateChatCompletion(ctx, decision)
resp, _, err := llm.CreateChatCompletion(ctx, decision)
if err != nil {
return err
}
Expand Down Expand Up @@ -271,7 +272,7 @@ func (f Fragment) SelectTool(ctx context.Context, llm LLM, availableTools Tools,
}
}

resp, err := llm.CreateChatCompletion(ctx, decision)
resp, _, err := llm.CreateChatCompletion(ctx, decision)
if err != nil {
return Fragment{}, nil, err
}
Expand Down
4 changes: 2 additions & 2 deletions fragment_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ var _ = Describe("Result test", Label("e2e"), func() {

fragment := NewEmptyFragment().AddMessage("user", "Write a short poem about the sea in less than 20 words.")

result, err := defaultLLM.Ask(context.TODO(), fragment)
result, _, err := defaultLLM.Ask(context.TODO(), fragment)

Expect(err).ToNot(HaveOccurred())

Expand Down Expand Up @@ -156,7 +156,7 @@ var _ = Describe("Result test", Label("e2e"), func() {
Content: "What's the weather today in San Francisco?",
})

newFragment, result, err := fragment.SelectTool(context.TODO(), *defaultLLM, Tools{
newFragment, result, err := fragment.SelectTool(context.TODO(), defaultLLM, Tools{
NewToolDefinition(
(&GetWeatherTool{}),
WeatherArgs{},
Expand Down
4 changes: 2 additions & 2 deletions goal.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func ExtractGoal(llm LLM, f Fragment, opts ...Option) (*structures.Goal, error)

goalConv := NewEmptyFragment().AddMessage("user", prompt)

reasoningGoal, err := llm.Ask(o.context, goalConv)
reasoningGoal, _, err := llm.Ask(o.context, goalConv)
if err != nil {
return nil, fmt.Errorf("failed to ask LLM for goal identification: %w", err)
}
Expand Down Expand Up @@ -91,7 +91,7 @@ func IsGoalAchieved(llm LLM, f Fragment, goal *structures.Goal, opts ...Option)
}
goalAchievedConv := NewEmptyFragment().AddMessage("user", prompt, multimedias...)

reasoningGoal, err := llm.Ask(o.context, goalAchievedConv)
reasoningGoal, _, err := llm.Ask(o.context, goalAchievedConv)
if err != nil {
return nil, fmt.Errorf("failed to ask LLM for goal identification: %w", err)
}
Expand Down
2 changes: 1 addition & 1 deletion guidelines.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func GetRelevantGuidelines(llm LLM, guidelines Guidelines, fragment Fragment, op

guidelineConv := NewEmptyFragment().AddMessage("user", guidelinePrompt)

guidelineResult, err := llm.Ask(o.context, guidelineConv)
guidelineResult, _, err := llm.Ask(o.context, guidelineConv)
if err != nil {
return Guidelines{}, fmt.Errorf("failed to ask LLM for guidelines: %w", err)
}
Expand Down
11 changes: 9 additions & 2 deletions llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,16 @@ import (
"github.com/sashabaranov/go-openai"
)

// LLMUsage represents token usage information from an LLM response
type LLMUsage struct {
PromptTokens int
CompletionTokens int
TotalTokens int
}

type LLM interface {
Ask(ctx context.Context, f Fragment) (Fragment, error)
CreateChatCompletion(ctx context.Context, request openai.ChatCompletionRequest) (LLMReply, error)
Ask(ctx context.Context, f Fragment) (Fragment, LLMUsage, error)
CreateChatCompletion(ctx context.Context, request openai.ChatCompletionRequest) (LLMReply, LLMUsage, error)
}

type LLMReply struct {
Expand Down
46 changes: 35 additions & 11 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,23 +63,29 @@ type Options struct {
todos *structures.TODOList

messagesManipulator func([]openai.ChatCompletionMessage) []openai.ChatCompletionMessage

// Compaction options - automatic conversation compaction based on token count
compactionThreshold int // Token count threshold that triggers compaction (0 = disabled)
compactionKeepMessages int // Number of recent messages to keep after compaction
}

type Option func(*Options)

func defaultOptions() *Options {
return &Options{
maxIterations: 1,
maxAttempts: 1,
maxRetries: 5,
loopDetectionSteps: 0,
forceReasoning: false,
maxAdjustmentAttempts: 5,
sinkStateTool: &defaultSinkStateTool{},
sinkState: true,
context: context.Background(),
statusCallback: func(s string) {},
reasoningCallback: func(s string) {},
maxIterations: 1,
maxAttempts: 1,
maxRetries: 5,
loopDetectionSteps: 0,
forceReasoning: false,
maxAdjustmentAttempts: 5,
sinkStateTool: &defaultSinkStateTool{},
sinkState: true,
context: context.Background(),
statusCallback: func(s string) {},
reasoningCallback: func(s string) {},
compactionThreshold: 0, // Disabled by default
compactionKeepMessages: 10, // Keep 10 recent messages by default
}
}

Expand Down Expand Up @@ -367,6 +373,24 @@ func WithMessageInjectionResultChan(ch chan MessageInjectionResult) func(o *Opti
}
}

// WithCompactionThreshold sets the token count threshold that triggers automatic
// conversation compaction. When total tokens in the response >= threshold,
// the conversation will be compacted to stay within the limit.
// Set to 0 (default) to disable automatic compaction.
func WithCompactionThreshold(threshold int) func(o *Options) {
return func(o *Options) {
o.compactionThreshold = threshold
}
}

// WithCompactionKeepMessages sets the number of recent messages to keep after
// compaction. Default is 10. This only applies when WithCompactionThreshold is set.
func WithCompactionKeepMessages(count int) func(o *Options) {
return func(o *Options) {
o.compactionKeepMessages = count
}
}

type defaultSinkStateTool struct{}

func (d *defaultSinkStateTool) Execute(args map[string]any) (string, any, error) {
Expand Down
6 changes: 3 additions & 3 deletions plan.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func applyPlanFromPrompt(llm LLM, o *Options, planPrompt string, feedbackConv *F
multimedias = feedbackConv.Multimedia
}
planConv := NewEmptyFragment().AddMessage("user", planPrompt, multimedias...)
reasoningPlan, err := llm.Ask(o.context, planConv)
reasoningPlan, _, err := llm.Ask(o.context, planConv)
if err != nil {
return nil, fmt.Errorf("failed to ask LLM for plan identification: %w", err)
}
Expand Down Expand Up @@ -165,7 +165,7 @@ func ExtractTODOs(llm LLM, plan *structures.Plan, goal *structures.Goal, opts ..
}

todoConv := NewEmptyFragment().AddMessage("user", promptStr)
reasoningTodo, err := llm.Ask(o.context, todoConv)
reasoningTodo, _, err := llm.Ask(o.context, todoConv)
if err != nil {
return nil, fmt.Errorf("failed to ask LLM for TODO generation: %w", err)
}
Expand Down Expand Up @@ -518,7 +518,7 @@ func executeReviewPhase(reviewerLLMs []LLM, workFragment Fragment, goal *structu
}

// Get the reasoning from the review
reviewResult, err := reviewerLLM.Ask(o.context, reviewFragment)
reviewResult, _, err := reviewerLLM.Ask(o.context, reviewFragment)
if err != nil {
return NewEmptyFragment(), false, fmt.Errorf("failed to get review result: %w", err)
}
Expand Down
Loading