diff --git a/internal/app/app.go b/internal/app/app.go index c6b2e0a3..44c3e4dd 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -169,6 +169,65 @@ func (a *App) GetTreeSession() *session.TreeManager { return a.opts.TreeSession } +// CompactConversation summarises older messages to free context space. It +// returns an error synchronously if compaction cannot start (agent busy or +// app closed). The actual compaction runs in a background goroutine and +// delivers CompactCompleteEvent or CompactErrorEvent through the registered +// tea.Program. customInstructions is optional text appended to the summary +// prompt (e.g. "Focus on the API design decisions"). +// +// Satisfies ui.AppController. +func (a *App) CompactConversation(customInstructions string) error { + a.mu.Lock() + if a.closed { + a.mu.Unlock() + return fmt.Errorf("app is closed") + } + if a.busy { + a.mu.Unlock() + return fmt.Errorf("cannot compact while the agent is working") + } + if a.opts.Kit == nil { + a.mu.Unlock() + return fmt.Errorf("SDK instance not available") + } + a.busy = true + a.wg.Add(1) + a.mu.Unlock() + + go func() { + defer a.wg.Done() + defer func() { + a.mu.Lock() + a.busy = false + a.mu.Unlock() + }() + + result, err := a.opts.Kit.Compact(a.rootCtx, nil, customInstructions) + if err != nil { + a.sendEvent(CompactErrorEvent{Err: err}) + return + } + if result == nil { + a.sendEvent(CompactErrorEvent{Err: fmt.Errorf("nothing to compact")}) + return + } + + // Sync in-memory store with the compacted session. + if a.opts.TreeSession != nil { + a.store.Replace(a.opts.TreeSession.GetFantasyMessages()) + } + + a.sendEvent(CompactCompleteEvent{ + Summary: result.Summary, + OriginalTokens: result.OriginalTokens, + CompactedTokens: result.CompactedTokens, + MessagesRemoved: result.MessagesRemoved, + }) + }() + return nil +} + // -------------------------------------------------------------------------- // Non-interactive execution // -------------------------------------------------------------------------- diff --git a/internal/app/events.go b/internal/app/events.go index 29479d95..7e200270 100644 --- a/internal/app/events.go +++ b/internal/app/events.go @@ -94,6 +94,25 @@ type MessageCreatedEvent struct { Message fantasy.Message } +// CompactCompleteEvent is sent when a /compact operation finishes successfully. +// It carries the summary text and before/after statistics. +type CompactCompleteEvent struct { + // Summary is the LLM-generated structured summary of the compacted messages. + Summary string + // OriginalTokens is the estimated token count before compaction. + OriginalTokens int + // CompactedTokens is the estimated token count after compaction. + CompactedTokens int + // MessagesRemoved is the number of messages that were summarised away. + MessagesRemoved int +} + +// CompactErrorEvent is sent when a /compact operation fails. +type CompactErrorEvent struct { + // Err is the error that caused compaction to fail. + Err error +} + // ExtensionPrintEvent is sent when an extension calls ctx.Print, ctx.PrintInfo, // ctx.PrintError, or ctx.PrintBlock. The TUI renders it via the appropriate // renderer and tea.Println (scrollback); the CLI handler uses diff --git a/internal/compaction/compaction.go b/internal/compaction/compaction.go index 0d17e972..fec92423 100644 --- a/internal/compaction/compaction.go +++ b/internal/compaction/compaction.go @@ -1,5 +1,10 @@ // Package compaction provides context window management with token estimation, // compaction triggers, and LLM-based conversation summarization. +// +// The algorithm mirrors Pi's approach: preserve a token budget of recent +// messages (KeepRecentTokens, default 20 000) rather than a fixed message +// count. Auto-compaction fires when estimated context usage exceeds +// contextWindow − ReserveTokens. package compaction import ( @@ -10,36 +15,55 @@ import ( "charm.land/fantasy" ) +// --------------------------------------------------------------------------- +// Token estimation +// --------------------------------------------------------------------------- + // EstimateTokens provides a rough token count (~4 chars per token). func EstimateTokens(text string) int { return len(text) / 4 } -// EstimateMessageTokens estimates total tokens across a slice of fantasy messages -// by summing the estimated tokens for every text part. +// EstimateMessageTokens estimates total tokens across a slice of fantasy +// messages by summing the estimated tokens for every text part. func EstimateMessageTokens(messages []fantasy.Message) int { total := 0 for _, msg := range messages { - for _, part := range msg.Content { - if tp, ok := part.(fantasy.TextPart); ok { - total += EstimateTokens(tp.Text) - } + total += estimateSingleMessageTokens(msg) + } + return total +} + +// estimateSingleMessageTokens returns the estimated token count for one +// message. +func estimateSingleMessageTokens(msg fantasy.Message) int { + total := 0 + for _, part := range msg.Content { + if tp, ok := part.(fantasy.TextPart); ok { + total += EstimateTokens(tp.Text) } } return total } -// ShouldCompact reports whether the conversation exceeds the threshold -// percentage of the context limit. thresholdPct should be in the range 0.0–1.0 -// (e.g. 0.8 means 80%). -func ShouldCompact(messages []fantasy.Message, contextLimit int, thresholdPct float64) bool { - if contextLimit <= 0 || thresholdPct <= 0 { +// --------------------------------------------------------------------------- +// Auto-compact trigger +// --------------------------------------------------------------------------- + +// ShouldCompact reports whether auto-compaction should fire. It uses Pi's +// formula: contextTokens > contextWindow − reserveTokens. +func ShouldCompact(messages []fantasy.Message, contextWindow int, reserveTokens int) bool { + if contextWindow <= 0 || reserveTokens <= 0 { return false } estimated := EstimateMessageTokens(messages) - return float64(estimated) >= float64(contextLimit)*thresholdPct + return estimated > contextWindow-reserveTokens } +// --------------------------------------------------------------------------- +// Options & defaults +// --------------------------------------------------------------------------- + // CompactionResult contains statistics from a compaction operation. type CompactionResult struct { Summary string // LLM-generated summary of compacted messages @@ -48,74 +72,170 @@ type CompactionResult struct { MessagesRemoved int // Number of messages replaced by the summary } -// CompactionOptions configures compaction behaviour. +// CompactionOptions configures compaction behaviour. Pi-style token-based +// defaults are applied for zero-value fields. type CompactionOptions struct { - ContextLimit int // Model's context window size (tokens) - ThresholdPct float64 // Trigger threshold (0.0–1.0), default 0.8 - PreserveRecent int // Number of recent messages to keep, default 10 - SummaryPrompt string // Custom summary prompt (empty = use default) + ContextWindow int // Model's context window size (tokens) + ReserveTokens int // Tokens to reserve for LLM response, default 16384 + KeepRecentTokens int // Recent tokens to preserve (not summarised), default 20000 + SummaryPrompt string // Custom summary prompt (empty = use default) } -// defaults fills zero-value fields with sensible defaults. +// defaults fills zero-value fields with sensible Pi-style defaults. func (o *CompactionOptions) defaults() { - if o.ThresholdPct <= 0 { - o.ThresholdPct = 0.8 + if o.ReserveTokens <= 0 { + o.ReserveTokens = 16384 } - if o.PreserveRecent <= 0 { - o.PreserveRecent = 10 + if o.KeepRecentTokens <= 0 { + o.KeepRecentTokens = 20000 } } -// defaultSummaryPrompt is the system prompt used to summarise older messages. -const defaultSummaryPrompt = `You are a conversation summarizer. Summarize the following conversation messages into a concise summary that preserves: -1. Key decisions and conclusions reached -2. Important context and facts established -3. Current task state and progress -4. Any pending actions or open questions +// defaultSystemPrompt is the system prompt sent to the summarisation LLM. +// Matches Pi's compaction system prompt. +const defaultSystemPrompt = `You are a context summarization assistant. Your task is to read a conversation between a user and an AI coding assistant, then produce a structured summary following the exact format specified. -Be concise but thorough. Output only the summary text, no preamble.` +Do NOT continue the conversation. Do NOT respond to any questions in the conversation. ONLY output the structured summary.` -// FindCutPoint determines the index at which to cut messages for compaction. -// Messages before the cut point will be summarised; messages from the cut -// point onward are preserved. Returns 0 if no compaction is needed. -func FindCutPoint(messages []fantasy.Message, preserveRecent int) int { - if preserveRecent <= 0 { - preserveRecent = 10 - } - if len(messages) <= preserveRecent { - return 0 // not enough messages to compact - } - return len(messages) - preserveRecent +// defaultSummaryPrompt is the user prompt appended after the serialised +// conversation. Matches Pi's initial-compaction format. +const defaultSummaryPrompt = `The messages above are a conversation to summarize. Create a structured context checkpoint summary that another LLM will use to continue the work. + +Use this EXACT format: + +## Goal +[What is the user trying to accomplish? Can be multiple items if the session covers different tasks.] + +## Constraints & Preferences +- [Any constraints, preferences, or requirements mentioned by user] +- [Or "(none)" if none were mentioned] + +## Progress +### Done +- [x] [Completed tasks/changes] + +### In Progress +- [ ] [Current work] + +### Blocked +- [Issues preventing progress, if any] + +## Key Decisions +- **[Decision]**: [Brief rationale] + +## Next Steps +1. [Ordered list of what should happen next] + +## Critical Context +- [Any data, examples, or references needed to continue] +- [Or "(none)" if not applicable] + +Keep each section concise. Preserve exact file paths, function names, and error messages.` + +// --------------------------------------------------------------------------- +// Cut point (token-based, Pi-style) +// --------------------------------------------------------------------------- + +// isValidCutPoint returns true if the message at index i is a valid place to +// split the conversation. Tool-role messages (tool results) must stay with +// their preceding assistant tool-call, so they are never valid cut points. +func isValidCutPoint(msg fantasy.Message) bool { + return msg.Role != fantasy.MessageRoleTool } -// Compact summarises older messages using the LLM, returning the compaction -// result and a new message slice (summary message + preserved recent messages). +// FindCutPoint walks backward from the end of messages, accumulating tokens +// until the keepRecentTokens budget is filled. Returns the index that +// separates "old" messages (0..cutPoint-1, to be summarised) from "recent" +// messages (cutPoint..end, to be preserved). // -// The model parameter is the same fantasy.LanguageModel used for regular -// generation — compaction creates a disposable fantasy agent with no tools to -// produce the summary. -func Compact( - ctx context.Context, - model fantasy.LanguageModel, - messages []fantasy.Message, - opts CompactionOptions, -) (*CompactionResult, []fantasy.Message, error) { - opts.defaults() - - cutPoint := FindCutPoint(messages, opts.PreserveRecent) - if cutPoint == 0 { - return nil, messages, nil // nothing to compact +// Returns 0 if there are fewer than 2 messages or all messages fit within +// the keep budget. +func FindCutPoint(messages []fantasy.Message, keepRecentTokens int) int { + if len(messages) < 2 { + return 0 + } + if keepRecentTokens <= 0 { + keepRecentTokens = 20000 } - oldMessages := messages[:cutPoint] - recentMessages := messages[cutPoint:] - originalTokens := EstimateMessageTokens(messages) + accumulated := 0 - // Build a textual representation of the messages to summarise. + for i := len(messages) - 1; i >= 0; i-- { + accumulated += estimateSingleMessageTokens(messages[i]) + if accumulated > keepRecentTokens { + cut := i + 1 + + // If the last message alone exceeds the budget, keep it + // anyway and summarise everything before it. + if cut >= len(messages) { + cut = len(messages) - 1 + } + + // Land on a valid cut point — scan forward past tool-result + // messages (they must stay with their preceding tool call). + for cut < len(messages) && !isValidCutPoint(messages[cut]) { + cut++ + } + if cut >= len(messages) { + return 0 + } + + // Need at least 2 messages before the cut to produce a + // meaningful summary. + if cut < 2 { + return 0 + } + return cut + } + } + + // All messages fit within the budget — nothing to compact. + return 0 +} + +// forceCutPoint returns a cut point that keeps only the last non-tool +// message, summarising everything before it. Used when the budget-based +// FindCutPoint returns 0 but the caller wants to compact anyway (manual +// /compact). Returns 0 if no valid cut exists. +func forceCutPoint(messages []fantasy.Message) int { + // Walk backward to find the last valid (non-tool) message boundary. + for i := len(messages) - 1; i >= 2; i-- { + if isValidCutPoint(messages[i]) { + return i + } + } + return 0 +} + +// --------------------------------------------------------------------------- +// Message serialisation (Pi-style) +// --------------------------------------------------------------------------- + +// roleLabel returns a human-readable label for a fantasy message role, +// matching Pi's serialisation format. +func roleLabel(role fantasy.MessageRole) string { + switch role { + case fantasy.MessageRoleUser: + return "[User]" + case fantasy.MessageRoleAssistant: + return "[Assistant]" + case fantasy.MessageRoleTool: + return "[Tool result]" + case fantasy.MessageRoleSystem: + return "[System]" + default: + return "[" + string(role) + "]" + } +} + +// serializeMessages converts a slice of fantasy messages into a plain-text +// representation suitable for sending to the summarisation LLM. The format +// mirrors Pi's compaction serialisation. +func serializeMessages(messages []fantasy.Message) string { var sb strings.Builder - for _, msg := range oldMessages { - sb.WriteString(string(msg.Role)) - sb.WriteString(": ") + for _, msg := range messages { + sb.WriteString(roleLabel(msg.Role)) + sb.WriteString(":\n") for _, part := range msg.Content { if tp, ok := part.(fantasy.TextPart); ok { sb.WriteString(tp.Text) @@ -123,20 +243,70 @@ func Compact( } sb.WriteString("\n\n") } - conversationText := sb.String() + return sb.String() +} - // Use the provided (or default) summary prompt. - summaryPrompt := opts.SummaryPrompt - if summaryPrompt == "" { - summaryPrompt = defaultSummaryPrompt +// --------------------------------------------------------------------------- +// Compact +// --------------------------------------------------------------------------- + +// Compact summarises older messages using the LLM, returning the compaction +// result and a new message slice (summary message + preserved recent +// messages). +// +// The model parameter is the same fantasy.LanguageModel used for regular +// generation — compaction creates a disposable fantasy agent with no tools to +// produce the summary. +// +// customInstructions is optional text appended to the summary prompt (e.g. +// "Focus on the API design decisions"). Pass "" to use the default prompt +// only. +func Compact( + ctx context.Context, + model fantasy.LanguageModel, + messages []fantasy.Message, + opts CompactionOptions, + customInstructions string, +) (*CompactionResult, []fantasy.Message, error) { + opts.defaults() + + if len(messages) < 2 { + return nil, messages, nil + } + + cutPoint := FindCutPoint(messages, opts.KeepRecentTokens) + if cutPoint == 0 { + // All messages fit within the keep budget. Force a cut that + // keeps only the last non-tool message — matching Pi, which + // always compacts when the user explicitly requests it. + cutPoint = forceCutPoint(messages) + if cutPoint == 0 { + return nil, messages, nil + } + } + + oldMessages := messages[:cutPoint] + recentMessages := messages[cutPoint:] + originalTokens := EstimateMessageTokens(messages) + + // Serialise old messages to text, matching Pi's format. + conversationText := serializeMessages(oldMessages) + + // Build the user-facing prompt: conversation text + summary instructions. + userPrompt := opts.SummaryPrompt + if userPrompt == "" { + userPrompt = defaultSummaryPrompt + } + if customInstructions != "" { + userPrompt += "\n\nAdditional instructions: " + customInstructions } // Create a lightweight agent (no tools) just for summarisation. summaryAgent := fantasy.NewAgent(model, - fantasy.WithSystemPrompt(summaryPrompt), + fantasy.WithSystemPrompt(defaultSystemPrompt), ) result, err := summaryAgent.Generate(ctx, fantasy.AgentCall{ - Prompt: conversationText, + Prompt: conversationText + "\n\n" + userPrompt, }) if err != nil { return nil, nil, fmt.Errorf("compaction summarisation failed: %w", err) diff --git a/internal/compaction/compaction_test.go b/internal/compaction/compaction_test.go index ebdeaa14..d54b4ed9 100644 --- a/internal/compaction/compaction_test.go +++ b/internal/compaction/compaction_test.go @@ -2,6 +2,7 @@ package compaction import ( "context" + "strings" "testing" "charm.land/fantasy" @@ -14,6 +15,16 @@ func makeTextMessage(role fantasy.MessageRole, text string) fantasy.Message { } } +// makeTextMessageN creates a message whose text is exactly n characters long +// (≈ n/4 estimated tokens). +func makeTextMessageN(role fantasy.MessageRole, n int) fantasy.Message { + return makeTextMessage(role, strings.Repeat("a", n)) +} + +// --------------------------------------------------------------------------- +// Token estimation +// --------------------------------------------------------------------------- + func TestEstimateTokens(t *testing.T) { tests := []struct { text string @@ -51,29 +62,30 @@ func TestEstimateMessageTokens_Empty(t *testing.T) { } } +// --------------------------------------------------------------------------- +// ShouldCompact (Pi-style: contextTokens > contextWindow - reserveTokens) +// --------------------------------------------------------------------------- + func TestShouldCompact(t *testing.T) { // Create messages that total ~100 tokens (400 chars). - longText := make([]byte, 400) - for i := range longText { - longText[i] = 'a' - } - msgs := []fantasy.Message{makeTextMessage(fantasy.MessageRoleUser, string(longText))} + msgs := []fantasy.Message{makeTextMessageN(fantasy.MessageRoleUser, 400)} tests := []struct { - name string - contextLimit int - threshold float64 - want bool + name string + contextWindow int + reserveTokens int + want bool }{ - {"above threshold", 120, 0.8, true}, // 100 >= 120*0.8=96 - {"below threshold", 200, 0.8, false}, // 100 < 200*0.8=160 - {"zero limit", 0, 0.8, false}, // no limit - {"zero threshold", 200, 0.0, false}, // no threshold - {"exactly at threshold", 125, 0.8, true}, // 100 >= 125*0.8=100 + {"above threshold", 110, 16, true}, // 100 > 110-16=94 → true + {"below threshold", 200, 16, false}, // 100 > 200-16=184 → false + {"zero window", 0, 16, false}, // no window + {"zero reserve", 200, 0, false}, // no reserve + {"exactly at threshold", 116, 16, false}, // 100 > 116-16=100 → false (not >) + {"one over", 115, 16, true}, // 100 > 115-16=99 → true } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := ShouldCompact(msgs, tt.contextLimit, tt.threshold) + got := ShouldCompact(msgs, tt.contextWindow, tt.reserveTokens) if got != tt.want { t.Errorf("ShouldCompact() = %v, want %v", got, tt.want) } @@ -81,26 +93,41 @@ func TestShouldCompact(t *testing.T) { } } -func TestFindCutPoint(t *testing.T) { - msgs := make([]fantasy.Message, 20) +// --------------------------------------------------------------------------- +// FindCutPoint (token-based, Pi-style) +// --------------------------------------------------------------------------- + +func TestFindCutPoint_TokenBased(t *testing.T) { + // Each message is 400 chars = ~100 tokens. + msgs := make([]fantasy.Message, 10) for i := range msgs { - msgs[i] = makeTextMessage(fantasy.MessageRoleUser, "msg") + if i%2 == 0 { + msgs[i] = makeTextMessageN(fantasy.MessageRoleUser, 400) + } else { + msgs[i] = makeTextMessageN(fantasy.MessageRoleAssistant, 400) + } } tests := []struct { - name string - preserveRecent int - want int + name string + keepRecentTokens int + want int // expected cut point }{ - {"preserve 10", 10, 10}, - {"preserve 5", 5, 15}, - {"preserve all", 20, 0}, - {"preserve more than total", 25, 0}, - {"preserve 0 uses default 10", 0, 10}, + // keepRecentTokens=250 → walk back: msg[9]=100, msg[8]=200 ≤ 250, + // msg[7]=300 > 250 → cut = 8. + {"keep 250 tokens", 250, 8}, + // keepRecentTokens=500 → walk back 5 msgs = 500 ≤ 500, + // 6th msg = 600 > 500 → cut = 5. + {"keep 500 tokens", 500, 5}, + // keepRecentTokens=1000 → all 10 msgs = 1000, not exceeded → cut = 0. + {"keep all", 1000, 0}, + // keepRecentTokens=50 → msg[9] alone = 100 > 50 → cut = 10, + // exceeds len → clamped to 9. 9 ≥ 2 → valid. + {"keep very few", 50, 9}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := FindCutPoint(msgs, tt.preserveRecent) + got := FindCutPoint(msgs, tt.keepRecentTokens) if got != tt.want { t.Errorf("FindCutPoint() = %d, want %d", got, tt.want) } @@ -108,38 +135,115 @@ func TestFindCutPoint(t *testing.T) { } } +func TestFindCutPoint_TooFewMessages(t *testing.T) { + msgs := []fantasy.Message{ + makeTextMessageN(fantasy.MessageRoleUser, 400), + } + got := FindCutPoint(msgs, 50) + if got != 0 { + t.Errorf("FindCutPoint(1 msg) = %d, want 0", got) + } +} + +func TestFindCutPoint_SkipsToolResults(t *testing.T) { + // [user, assistant, tool, user, assistant] + // Each 400 chars = 100 tokens. keepRecentTokens=150 → walk back: + // msg[4] (assistant) = 100 ≤ 150 + // msg[3] (user) = 200 > 150 → raw cut at 4, but check validity. + // msg[4] is assistant → valid cut point. Cut = 4. + msgs := []fantasy.Message{ + makeTextMessageN(fantasy.MessageRoleUser, 400), + makeTextMessageN(fantasy.MessageRoleAssistant, 400), + makeTextMessageN(fantasy.MessageRoleTool, 400), + makeTextMessageN(fantasy.MessageRoleUser, 400), + makeTextMessageN(fantasy.MessageRoleAssistant, 400), + } + got := FindCutPoint(msgs, 150) + if got != 4 { + t.Errorf("FindCutPoint() = %d, want 4", got) + } + + // Now test where the raw cut lands on a tool result. + // [user, assistant, tool, tool, user] + // keepRecentTokens=50 → walk back: msg[4]=100 > 50 → raw cut at 5? No, + // i=4, accumulated=100 > 50, cut = i+1 = 5 → that's len(msgs), so no + // valid split. Actually let me think again... + // i starts at 4 (last), accumulated += 100 = 100 > 50 → cut = 5. + // cut=5 >= len(msgs)=5 → return 0. Correct. + + // Try keepRecentTokens=150 → walk back: + // msg[4] (user) = 100 ≤ 150 + // msg[3] (tool) = 200 > 150 → cut at 4, msg[4] is user → valid. + msgs2 := []fantasy.Message{ + makeTextMessageN(fantasy.MessageRoleUser, 400), + makeTextMessageN(fantasy.MessageRoleAssistant, 400), + makeTextMessageN(fantasy.MessageRoleTool, 400), + makeTextMessageN(fantasy.MessageRoleTool, 400), + makeTextMessageN(fantasy.MessageRoleUser, 400), + } + got2 := FindCutPoint(msgs2, 150) + if got2 != 4 { + t.Errorf("FindCutPoint(tool results) = %d, want 4", got2) + } + + // Where raw cut lands ON a tool message and must scan forward. + // [user(0), assistant(1), tool(2), tool(3), user(4), assistant(5)] + // keepRecentTokens=250 → walk back: + // msg[5] = 100 ≤ 250 + // msg[4] = 200 ≤ 250 + // msg[3] = 300 > 250 → cut at 4, msg[4] is user → valid. + msgs3 := []fantasy.Message{ + makeTextMessageN(fantasy.MessageRoleUser, 400), + makeTextMessageN(fantasy.MessageRoleAssistant, 400), + makeTextMessageN(fantasy.MessageRoleTool, 400), + makeTextMessageN(fantasy.MessageRoleTool, 400), + makeTextMessageN(fantasy.MessageRoleUser, 400), + makeTextMessageN(fantasy.MessageRoleAssistant, 400), + } + got3 := FindCutPoint(msgs3, 250) + if got3 != 4 { + t.Errorf("FindCutPoint(scan forward) = %d, want 4", got3) + } +} + +// --------------------------------------------------------------------------- +// CompactionOptions defaults +// --------------------------------------------------------------------------- + func TestCompactionOptions_Defaults(t *testing.T) { opts := CompactionOptions{} opts.defaults() - if opts.ThresholdPct != 0.8 { - t.Errorf("ThresholdPct = %f, want 0.8", opts.ThresholdPct) + if opts.ReserveTokens != 16384 { + t.Errorf("ReserveTokens = %d, want 16384", opts.ReserveTokens) } - if opts.PreserveRecent != 10 { - t.Errorf("PreserveRecent = %d, want 10", opts.PreserveRecent) + if opts.KeepRecentTokens != 20000 { + t.Errorf("KeepRecentTokens = %d, want 20000", opts.KeepRecentTokens) } } func TestCompactionOptions_DefaultsPreservesExisting(t *testing.T) { - opts := CompactionOptions{ThresholdPct: 0.9, PreserveRecent: 5} + opts := CompactionOptions{ReserveTokens: 8192, KeepRecentTokens: 10000} opts.defaults() - if opts.ThresholdPct != 0.9 { - t.Errorf("ThresholdPct = %f, want 0.9", opts.ThresholdPct) + if opts.ReserveTokens != 8192 { + t.Errorf("ReserveTokens = %d, want 8192", opts.ReserveTokens) } - if opts.PreserveRecent != 5 { - t.Errorf("PreserveRecent = %d, want 5", opts.PreserveRecent) + if opts.KeepRecentTokens != 10000 { + t.Errorf("KeepRecentTokens = %d, want 10000", opts.KeepRecentTokens) } } +// --------------------------------------------------------------------------- +// Compact (integration — too few messages) +// --------------------------------------------------------------------------- + func TestCompact_TooFewMessages(t *testing.T) { - msgs := make([]fantasy.Message, 5) - for i := range msgs { - msgs[i] = makeTextMessage(fantasy.MessageRoleUser, "short") + msgs := []fantasy.Message{ + makeTextMessageN(fantasy.MessageRoleUser, 400), } - // Default preserveRecent = 10, so 5 messages is too few. - result, newMsgs, err := Compact(context.TODO(), nil, msgs, CompactionOptions{}) + result, newMsgs, err := Compact(context.TODO(), nil, msgs, CompactionOptions{}, "") if err != nil { t.Fatalf("unexpected error: %v", err) } @@ -150,3 +254,22 @@ func TestCompact_TooFewMessages(t *testing.T) { t.Errorf("messages changed: got %d, want %d", len(newMsgs), len(msgs)) } } + +func TestCompact_WithinBudget(t *testing.T) { + // 2 messages, each 100 tokens, keepRecentTokens=20000 → all fit. + msgs := []fantasy.Message{ + makeTextMessageN(fantasy.MessageRoleUser, 400), + makeTextMessageN(fantasy.MessageRoleAssistant, 400), + } + + result, newMsgs, err := Compact(context.TODO(), nil, msgs, CompactionOptions{}, "") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if result != nil { + t.Error("expected nil result when all messages fit within budget") + } + if len(newMsgs) != len(msgs) { + t.Errorf("messages changed: got %d, want %d", len(newMsgs), len(msgs)) + } +} diff --git a/internal/ui/commands.go b/internal/ui/commands.go index baee2bc3..a0869710 100644 --- a/internal/ui/commands.go +++ b/internal/ui/commands.go @@ -59,6 +59,12 @@ var SlashCommands = []SlashCommand{ Category: "System", Aliases: []string{"/cq"}, }, + { + Name: "/compact", + Description: "Summarise older messages to free context space", + Category: "System", + Aliases: []string{"/co"}, + }, { Name: "/quit", Description: "Exit the application", diff --git a/internal/ui/model.go b/internal/ui/model.go index 148b3f77..48fc9f5b 100644 --- a/internal/ui/model.go +++ b/internal/ui/model.go @@ -50,6 +50,12 @@ type AppController interface { ClearQueue() // ClearMessages clears the conversation history. ClearMessages() + // CompactConversation summarises older messages to free context space. + // Runs asynchronously; results are delivered via CompactCompleteEvent or + // CompactErrorEvent sent through the registered tea.Program. Returns an + // error synchronously if compaction cannot be started (e.g. agent is busy). + // customInstructions is optional text appended to the summary prompt. + CompactConversation(customInstructions string) error // GetTreeSession returns the tree session manager, or nil if tree sessions // are not enabled. Used by slash commands like /tree, /fork, /session. GetTreeSession() *session.TreeManager @@ -497,6 +503,17 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { return m, tea.Batch(cmds...) } + // /compact supports optional args: "/compact Focus on API decisions". + // GetCommandByName won't match the full text, so check the prefix. + if name, args, ok := strings.Cut(msg.Text, " "); ok { + if sc := GetCommandByName(name); sc != nil && sc.Name == "/compact" { + if cmd := m.handleCompactCommand(strings.TrimSpace(args)); cmd != nil { + cmds = append(cmds, cmd) + } + return m, tea.Batch(cmds...) + } + } + // Check extension-registered slash commands. These support arguments // (e.g. "/sub list files"), so we split on the first space. if cmd := m.handleExtensionCommand(msg.Text); cmd != nil { @@ -636,6 +653,20 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { m.state = stateInput m.canceling = false + case app.CompactCompleteEvent: + if m.stream != nil { + m.stream.Reset() + } + m.state = stateInput + cmds = append(cmds, m.printCompactResult(msg)) + + case app.CompactErrorEvent: + if m.stream != nil { + m.stream.Reset() + } + m.state = stateInput + cmds = append(cmds, m.printSystemMessage(fmt.Sprintf("Compaction failed: %v", msg.Err))) + case app.ExtensionPrintEvent: // Extension output — route through styled renderers when a level is set. switch msg.Level { @@ -870,6 +901,8 @@ func (m *AppModel) handleSlashCommand(sc *SlashCommand) tea.Cmd { return m.printUsageMessage() case "/reset-usage": return m.printResetUsage() + case "/compact": + return m.handleCompactCommand("") case "/clear": if m.appCtrl != nil { m.appCtrl.ClearMessages() @@ -987,6 +1020,7 @@ func (m *AppModel) printHelpMessage() tea.Cmd { "- `/fork`: Branch from an earlier message\n" + "- `/new`: Start a new branch (preserves history)\n\n" + "**System:**\n" + + "- `/compact [instructions]`: Summarise older messages to free context space\n" + "- `/clear`: Clear message history\n" + "- `/reset-usage`: Reset usage statistics\n" + "- `/quit`: Exit the application\n\n" @@ -1080,6 +1114,54 @@ func (m *AppModel) printResetUsage() tea.Cmd { return m.printSystemMessage("Usage statistics have been reset.") } +// handleCompactCommand starts an async compaction. It returns a tea.Cmd that +// prints a "compacting..." message and transitions to the working state. If +// the app controller rejects the request (busy, closed) it prints an error +// instead. customInstructions is optional text appended to the summary +// prompt (e.g. "Focus on the API design decisions"). +func (m *AppModel) handleCompactCommand(customInstructions string) tea.Cmd { + if m.appCtrl == nil { + return m.printSystemMessage("Compaction is not available.") + } + if err := m.appCtrl.CompactConversation(customInstructions); err != nil { + return m.printSystemMessage(fmt.Sprintf("Cannot compact: %v", err)) + } + // Transition to working state so the spinner shows while compaction runs. + m.state = stateWorking + var spinnerCmd tea.Cmd + if m.stream != nil { + _, spinnerCmd = m.stream.Update(app.SpinnerEvent{Show: true}) + } + return tea.Batch(m.printSystemMessage("Compacting conversation..."), spinnerCmd) +} + +// printCompactResult renders the compaction summary in a styled block with +// a distinct border color and a stats subtitle. +func (m *AppModel) printCompactResult(evt app.CompactCompleteEvent) tea.Cmd { + theme := GetTheme() + + saved := evt.OriginalTokens - evt.CompactedTokens + subtitle := fmt.Sprintf( + "%d messages summarised, ~%dk tokens freed (%dk -> %dk)", + evt.MessagesRemoved, saved/1000, evt.OriginalTokens/1000, evt.CompactedTokens/1000, + ) + + content := evt.Summary + if subtitle != "" { + sub := lipgloss.NewStyle().Foreground(theme.VeryMuted).Render(" " + subtitle) + content = strings.TrimSuffix(content, "\n") + "\n\n" + sub + } + + rendered := renderContentBlock( + content, + m.width, + WithAlign(lipgloss.Left), + WithBorderColor(theme.Secondary), + WithMarginBottom(1), + ) + return tea.Println(rendered) +} + // flushStreamContent gets the rendered content from the stream component, // emits it above the BT region via tea.Println, and resets the stream. This // is called before printing tool calls (streaming completes before tools fire) diff --git a/internal/ui/model_test.go b/internal/ui/model_test.go index 27ce7c4f..8422b112 100644 --- a/internal/ui/model_test.go +++ b/internal/ui/model_test.go @@ -45,6 +45,10 @@ func (s *stubAppController) ClearMessages() { s.clearMsgCalled++ } +func (s *stubAppController) CompactConversation(_ string) error { + return nil +} + func (s *stubAppController) GetTreeSession() *session.TreeManager { return nil } diff --git a/pkg/kit/compaction.go b/pkg/kit/compaction.go index da2dd26a..ea6fb786 100644 --- a/pkg/kit/compaction.go +++ b/pkg/kit/compaction.go @@ -23,21 +23,22 @@ func (m *Kit) EstimateContextTokens() int { } // ShouldCompact reports whether the conversation is near the model's context -// limit and should be compacted. Returns false if the model's context limit -// is unknown or if no compaction options are configured. +// limit and should be compacted. Uses Pi's formula: +// contextTokens > contextWindow − reserveTokens. +// Returns false if the model's context limit is unknown. func (m *Kit) ShouldCompact() bool { info := m.GetModelInfo() if info == nil || info.Limit.Context <= 0 { return false } - threshold := 0.8 - if m.compactionOpts != nil && m.compactionOpts.ThresholdPct > 0 { - threshold = m.compactionOpts.ThresholdPct + reserveTokens := 16384 + if m.compactionOpts != nil && m.compactionOpts.ReserveTokens > 0 { + reserveTokens = m.compactionOpts.ReserveTokens } messages := m.treeSession.GetFantasyMessages() - return compaction.ShouldCompact(messages, info.Limit.Context, threshold) + return compaction.ShouldCompact(messages, info.Limit.Context, reserveTokens) } // GetContextStats returns current context usage statistics including @@ -61,13 +62,16 @@ func (m *Kit) GetContextStats() ContextStats { } // Compact summarises older messages to reduce context usage. If opts is nil, -// the instance's CompactionOptions (or sensible defaults) are used. The model's -// context limit is automatically populated from the model registry when -// opts.ContextLimit is 0. +// the instance's CompactionOptions (or sensible defaults) are used. The +// model's context window is automatically populated from the model registry +// when opts.ContextWindow is 0. // -// After compaction, the tree session is cleared and replaced with the compacted -// messages (summary + preserved recent messages). -func (m *Kit) Compact(ctx context.Context, opts *CompactionOptions) (*CompactionResult, error) { +// customInstructions is optional text appended to the summary prompt (e.g. +// "Focus on the API design decisions"). Pass "" for the default prompt. +// +// After compaction, the tree session is cleared and replaced with the +// compacted messages (summary + preserved recent messages). +func (m *Kit) Compact(ctx context.Context, opts *CompactionOptions, customInstructions string) (*CompactionResult, error) { if opts == nil { if m.compactionOpts != nil { opts = m.compactionOpts @@ -76,25 +80,24 @@ func (m *Kit) Compact(ctx context.Context, opts *CompactionOptions) (*Compaction } } - // Auto-populate context limit from model info if not set. - if opts.ContextLimit <= 0 { + // Auto-populate context window from model info if not set. + if opts.ContextWindow <= 0 { if info := m.GetModelInfo(); info != nil { - opts.ContextLimit = info.Limit.Context + opts.ContextWindow = info.Limit.Context } } messages := m.treeSession.GetFantasyMessages() - if len(messages) == 0 { - return nil, fmt.Errorf("cannot compact: no messages in session") + if len(messages) < 2 { + return nil, fmt.Errorf("cannot compact: need at least 2 messages") } model := m.agent.GetModel() - result, newMessages, err := compaction.Compact(ctx, model, messages, *opts) + result, newMessages, err := compaction.Compact(ctx, model, messages, *opts, customInstructions) if err != nil { return nil, err } if result == nil { - // Nothing to compact (too few messages). return nil, nil } diff --git a/pkg/kit/kit.go b/pkg/kit/kit.go index 3f83695f..dc8aa4ae 100644 --- a/pkg/kit/kit.go +++ b/pkg/kit/kit.go @@ -540,7 +540,7 @@ func (m *Kit) runTurn(ctx context.Context, promptLabel string, prompt string, pr // Auto-compact if enabled and conversation is near the context limit. if m.autoCompact && m.ShouldCompact() { - _, _ = m.Compact(ctx, m.compactionOpts) // best-effort + _, _ = m.Compact(ctx, m.compactionOpts, "") // best-effort } // Build context from the tree so only the current branch is sent.