// Package compaction provides context window management with token estimation, // compaction triggers, and LLM-based conversation summarization. // // The algorithm preserves a token budget of recent // messages (KeepRecentTokens, default 20 000) rather than a fixed message // count. Auto-compaction fires when estimated context usage exceeds // contextWindow − ReserveTokens. // // Features modelled after pi's compaction system: // - Tool result truncation (2000 char max) during serialisation // - Split turn handling: when a single turn exceeds the keep budget, // the turn prefix is summarised separately and merged // - Cumulative file tracking: read and modified files extracted from // tool calls and carried forward across compactions package compaction import ( "context" "encoding/json" "fmt" "strings" "charm.land/fantasy" ) // --------------------------------------------------------------------------- // Token estimation // --------------------------------------------------------------------------- // estimateTokens provides a rough token count (~4 chars per token). func estimateTokens(text string) int { return len(text) / 4 } // EstimateMessageTokens estimates total tokens across a slice of fantasy // messages by summing the estimated tokens for every text part. func EstimateMessageTokens(messages []fantasy.Message) int { total := 0 for _, msg := range messages { total += estimateSingleMessageTokens(msg) } return total } // estimateSingleMessageTokens returns the estimated token count for one // message. func estimateSingleMessageTokens(msg fantasy.Message) int { total := 0 for _, part := range msg.Content { if tp, ok := part.(fantasy.TextPart); ok { total += estimateTokens(tp.Text) } } return total } // --------------------------------------------------------------------------- // Auto-compact trigger // --------------------------------------------------------------------------- // ShouldCompact reports whether auto-compaction should fire. // Formula: contextTokens > contextWindow − reserveTokens. func ShouldCompact(messages []fantasy.Message, contextWindow int, reserveTokens int) bool { if contextWindow <= 0 || reserveTokens <= 0 { return false } estimated := EstimateMessageTokens(messages) return estimated > contextWindow-reserveTokens } // --------------------------------------------------------------------------- // Options & defaults // --------------------------------------------------------------------------- // CompactionResult contains statistics from a compaction operation. type CompactionResult struct { Summary string // LLM-generated summary of compacted messages OriginalTokens int // Estimated token count before compaction CompactedTokens int // Estimated token count after compaction MessagesRemoved int // Number of messages replaced by the summary CutPoint int // Index in the original messages where the cut was made ReadFiles []string // Files read during the compacted conversation ModifiedFiles []string // Files modified during the compacted conversation } // CompactionOptions configures compaction behaviour. Token-based defaults // are applied for zero-value fields. type CompactionOptions struct { ContextWindow int // Model's context window size (tokens) ReserveTokens int // Tokens to reserve for LLM response, default 16384 KeepRecentTokens int // Recent tokens to preserve (not summarised), default 20000 SummaryPrompt string // Custom summary prompt (empty = use default) } // defaults fills zero-value fields with sensible defaults. func (o *CompactionOptions) defaults() { if o.ReserveTokens <= 0 { o.ReserveTokens = 16384 } if o.KeepRecentTokens <= 0 { o.KeepRecentTokens = 20000 } } // defaultSystemPrompt is the system prompt sent to the summarisation LLM. const defaultSystemPrompt = `You are a context summarization assistant. Your task is to read a conversation between a user and an AI coding assistant, then produce a structured summary following the exact format specified. Do NOT continue the conversation. Do NOT respond to any questions in the conversation. ONLY output the structured summary.` // defaultSummaryPrompt is the user prompt appended after the serialised // conversation. const defaultSummaryPrompt = `The messages above are a conversation to summarize. Create a structured context checkpoint summary that another LLM will use to continue the work. Use this EXACT format: ## Goal [What is the user trying to accomplish? Can be multiple items if the session covers different tasks.] ## Constraints & Preferences - [Any constraints, preferences, or requirements mentioned by user] - [Or "(none)" if none were mentioned] ## Progress ### Done - [x] [Completed tasks/changes] ### In Progress - [ ] [Current work] ### Blocked - [Issues preventing progress, if any] ## Key Decisions - **[Decision]**: [Brief rationale] ## Next Steps 1. [Ordered list of what should happen next] ## Critical Context - [Any data, examples, or references needed to continue] - [Or "(none)" if not applicable] [One file path per line for files that were read during the conversation] [One file path per line for files that were created, edited, or written during the conversation] Keep each section concise. Preserve exact file paths, function names, and error messages.` // --------------------------------------------------------------------------- // Tool result truncation // --------------------------------------------------------------------------- // maxToolResultChars is the maximum length of tool result text preserved // during serialisation. Longer results are truncated with a marker. const maxToolResultChars = 2000 // truncateToolResult truncates text to maxToolResultChars, appending a // marker indicating how many characters were removed. func truncateToolResult(text string) string { if len(text) <= maxToolResultChars { return text } truncated := len(text) - maxToolResultChars return text[:maxToolResultChars] + fmt.Sprintf("\n[...%d chars truncated]", truncated) } // --------------------------------------------------------------------------- // Cut point (token-based) // --------------------------------------------------------------------------- // isValidCutPoint returns true if the message at index i is a valid place to // split the conversation. Tool-role messages (tool results) must stay with // their preceding assistant tool-call, so they are never valid cut points. func isValidCutPoint(msg fantasy.Message) bool { return msg.Role != fantasy.MessageRoleTool } // findTurnStart returns the index of the user message that starts the turn // containing messages[idx]. A "turn" starts with a user message and includes // all subsequent assistant/tool messages until the next user message. func findTurnStart(messages []fantasy.Message, idx int) int { for i := idx; i >= 0; i-- { if messages[i].Role == fantasy.MessageRoleUser { return i } } return 0 } // FindCutPoint walks backward from the end of messages, accumulating tokens // until the keepRecentTokens budget is filled. Returns the index that // separates "old" messages (0..cutPoint-1, to be summarised) from "recent" // messages (cutPoint..end, to be preserved). // // The cut point prefers turn boundaries (user messages). When a single turn // exceeds the budget, the cut lands mid-turn (IsSplitTurn returns true). // // Returns 0 if there are fewer than 2 messages or all messages fit within // the keep budget. func FindCutPoint(messages []fantasy.Message, keepRecentTokens int) int { if len(messages) < 2 { return 0 } if keepRecentTokens <= 0 { keepRecentTokens = 20000 } accumulated := 0 for i := len(messages) - 1; i >= 0; i-- { accumulated += estimateSingleMessageTokens(messages[i]) if accumulated > keepRecentTokens { cut := i + 1 // If the last message alone exceeds the budget, keep it // anyway and summarise everything before it. if cut >= len(messages) { cut = len(messages) - 1 } // Land on a valid cut point — scan forward past tool-result // messages (they must stay with their preceding tool call). for cut < len(messages) && !isValidCutPoint(messages[cut]) { cut++ } if cut >= len(messages) { return 0 } // Need at least 2 messages before the cut to produce a // meaningful summary. if cut < 2 { return 0 } return cut } } // All messages fit within the budget — nothing to compact. return 0 } // IsSplitTurn returns true if the cut point lands in the middle of a turn // (i.e. the message at cutPoint is not a user message, meaning we're // splitting a single turn's assistant/tool messages). func IsSplitTurn(messages []fantasy.Message, cutPoint int) bool { if cutPoint <= 0 || cutPoint >= len(messages) { return false } // If the cut point is at a user message, it's a clean turn boundary. if messages[cutPoint].Role == fantasy.MessageRoleUser { return false } // Otherwise we're cutting mid-turn — check if the turn started before // the cut point. turnStart := findTurnStart(messages, cutPoint) return turnStart < cutPoint } // forceCutPoint returns a cut point that keeps only the last non-tool // message, summarising everything before it. Used when the budget-based // FindCutPoint returns 0 but the caller wants to compact anyway (manual // /compact). Returns 0 if no valid cut exists. func forceCutPoint(messages []fantasy.Message) int { // Walk backward to find the last valid (non-tool) message boundary. for i := len(messages) - 1; i >= 2; i-- { if isValidCutPoint(messages[i]) { return i } } return 0 } // --------------------------------------------------------------------------- // File tracking // --------------------------------------------------------------------------- // fileOps contains cumulative file operation tracking. type fileOps struct { ReadFiles map[string]bool ModifiedFiles map[string]bool } func newFileOps() *fileOps { return &fileOps{ ReadFiles: make(map[string]bool), ModifiedFiles: make(map[string]bool), } } // extractFileOps scans messages for tool calls and extracts file paths. // It recognises the built-in Kit tools: read, write, edit, bash, grep, find, ls. func extractFileOps(messages []fantasy.Message) *fileOps { ops := newFileOps() for _, msg := range messages { for _, part := range msg.Content { tc, ok := part.(fantasy.ToolCallPart) if !ok { continue } // Parse the JSON input to extract path arguments. var args map[string]any if err := json.Unmarshal([]byte(tc.Input), &args); err != nil { continue } path, _ := args["path"].(string) if path == "" { continue } switch tc.ToolName { case "read", "grep", "find", "ls": ops.ReadFiles[path] = true case "write", "edit": ops.ModifiedFiles[path] = true } } } return ops } // merge combines another fileOps into this one (for cumulative tracking). func (f *fileOps) merge(other *fileOps) { if other == nil { return } for k := range other.ReadFiles { f.ReadFiles[k] = true } for k := range other.ModifiedFiles { f.ModifiedFiles[k] = true } } // mergeSlices adds previously tracked file lists (from a prior compaction). func (f *fileOps) mergeSlices(readFiles, modifiedFiles []string) { for _, p := range readFiles { f.ReadFiles[p] = true } for _, p := range modifiedFiles { f.ModifiedFiles[p] = true } } // sortedKeys returns the keys of a bool map sorted alphabetically. func sortedKeys(m map[string]bool) []string { if len(m) == 0 { return nil } keys := make([]string, 0, len(m)) for k := range m { keys = append(keys, k) } // Simple sort — no need for sort package for small lists. for i := 0; i < len(keys); i++ { for j := i + 1; j < len(keys); j++ { if keys[j] < keys[i] { keys[i], keys[j] = keys[j], keys[i] } } } return keys } // --------------------------------------------------------------------------- // Message serialisation // --------------------------------------------------------------------------- // roleLabel returns a human-readable label for a fantasy message role. func roleLabel(role fantasy.MessageRole) string { switch role { case fantasy.MessageRoleUser: return "[User]" case fantasy.MessageRoleAssistant: return "[Assistant]" case fantasy.MessageRoleTool: return "[Tool result]" case fantasy.MessageRoleSystem: return "[System]" default: return "[" + string(role) + "]" } } // serializeMessages converts a slice of fantasy messages into a plain-text // representation suitable for sending to the summarisation LLM. Tool result // text is truncated to maxToolResultChars to keep the summarisation request // within reasonable token budgets. func serializeMessages(messages []fantasy.Message) string { var sb strings.Builder for _, msg := range messages { sb.WriteString(roleLabel(msg.Role)) sb.WriteString(":\n") for _, part := range msg.Content { switch p := part.(type) { case fantasy.TextPart: if msg.Role == fantasy.MessageRoleTool { sb.WriteString(truncateToolResult(p.Text)) } else { sb.WriteString(p.Text) } case fantasy.ToolCallPart: fmt.Fprintf(&sb, "[Tool call: %s(%s)]", p.ToolName, truncateToolResult(p.Input)) case fantasy.ReasoningPart: fmt.Fprintf(&sb, "[Thinking]: %s", truncateToolResult(p.Text)) } } sb.WriteString("\n\n") } return sb.String() } // --------------------------------------------------------------------------- // Compact // --------------------------------------------------------------------------- // PreviousCompaction carries file tracking state from a prior compaction so // that file operations accumulate across multiple compactions. type PreviousCompaction struct { ReadFiles []string ModifiedFiles []string } // StreamCallback is called for each chunk of text during streaming compaction. // Return a non-nil error to cancel the stream. type StreamCallback func(delta string) error // Compact summarises older messages using the LLM, returning the compaction // result and a new message slice (summary message + preserved recent // messages). // // The model parameter is the same fantasy.LanguageModel used for regular // generation — compaction creates a disposable fantasy agent with no tools to // produce the summary. // // customInstructions is optional text appended to the summary prompt (e.g. // "Focus on the API design decisions"). Pass "" to use the default prompt // only. // // prev carries file tracking from a previous compaction for cumulative // tracking. Pass nil if there is no prior compaction. // onChunk is an optional callback for streaming summary text. Pass nil for // non-streaming compaction. func Compact( ctx context.Context, model fantasy.LanguageModel, messages []fantasy.Message, opts CompactionOptions, customInstructions string, prev *PreviousCompaction, onChunk StreamCallback, ) (*CompactionResult, []fantasy.Message, error) { opts.defaults() if len(messages) < 2 { return nil, messages, nil } cutPoint := FindCutPoint(messages, opts.KeepRecentTokens) if cutPoint == 0 { // All messages fit within the keep budget. Force a cut that // keeps only the last non-tool message — always compact when // the user explicitly requests it. cutPoint = forceCutPoint(messages) if cutPoint == 0 { return nil, messages, nil } } oldMessages := messages[:cutPoint] recentMessages := messages[cutPoint:] originalTokens := EstimateMessageTokens(messages) // Extract file operations from old messages. ops := extractFileOps(oldMessages) // Accumulate from previous compaction if present. if prev != nil { ops.mergeSlices(prev.ReadFiles, prev.ModifiedFiles) } // Also scan recent messages for file ops (they'll be carried forward). recentOps := extractFileOps(recentMessages) ops.merge(recentOps) // Handle split turns: when the cut lands mid-turn, summarise the turn // prefix separately and merge with the history summary. var summaryText string var err error if IsSplitTurn(messages, cutPoint) { summaryText, err = compactSplitTurn(ctx, model, oldMessages, messages, cutPoint, opts, customInstructions, onChunk) } else { summaryText, err = compactNormal(ctx, model, oldMessages, opts, customInstructions, onChunk) } if err != nil { return nil, nil, err } if summaryText == "" { return nil, nil, fmt.Errorf("compaction produced an empty summary") } // Build the new message list: summary as a system message + preserved recent. summaryMessage := fantasy.Message{ Role: fantasy.MessageRoleSystem, Content: []fantasy.MessagePart{ fantasy.TextPart{ Text: fmt.Sprintf("[Conversation summary — earlier messages were compacted]\n\n%s", summaryText), }, }, } newMessages := make([]fantasy.Message, 0, 1+len(recentMessages)) newMessages = append(newMessages, summaryMessage) newMessages = append(newMessages, recentMessages...) compactedTokens := EstimateMessageTokens(newMessages) return &CompactionResult{ Summary: summaryText, OriginalTokens: originalTokens, CompactedTokens: compactedTokens, MessagesRemoved: len(oldMessages), CutPoint: cutPoint, ReadFiles: sortedKeys(ops.ReadFiles), ModifiedFiles: sortedKeys(ops.ModifiedFiles), }, newMessages, nil } // compactNormal generates a summary for a clean turn-boundary cut. // If onChunk is provided, text deltas are streamed to it. func compactNormal( ctx context.Context, model fantasy.LanguageModel, oldMessages []fantasy.Message, opts CompactionOptions, customInstructions string, onChunk StreamCallback, ) (string, error) { conversationText := serializeMessages(oldMessages) return generateSummary(ctx, model, conversationText, opts, customInstructions, onChunk) } // compactSplitTurn handles the case where the cut point lands mid-turn. // It generates two summaries and merges them: // 1. History summary: all complete turns before the split turn // 2. Turn prefix summary: the early part of the split turn (from the turn's // user message up to the cut point) // // The merged result preserves context from both the older history and the // beginning of the current long turn. // If onChunk is provided, both summaries and the separator are streamed. func compactSplitTurn( ctx context.Context, model fantasy.LanguageModel, oldMessages []fantasy.Message, allMessages []fantasy.Message, cutPoint int, opts CompactionOptions, customInstructions string, onChunk StreamCallback, ) (string, error) { // Find where the split turn starts. turnStart := findTurnStart(allMessages, cutPoint) // Messages before the turn are the "history" portion. historyMessages := oldMessages if turnStart > 0 && turnStart < len(oldMessages) { historyMessages = oldMessages[:turnStart] } // The turn prefix: from turnStart to cutPoint. turnPrefixMessages := allMessages[turnStart:cutPoint] var historySummary string var err error // Generate history summary if there are complete turns before the split. if len(historyMessages) >= 2 { historySummary, err = generateSummary(ctx, model, serializeMessages(historyMessages), opts, "", onChunk) if err != nil { return "", fmt.Errorf("split turn history summary failed: %w", err) } } // Stream the separator between history and turn prefix summaries. if onChunk != nil && historySummary != "" { if err := onChunk("\n\n---\n\n## Current Turn (in progress)\n\n"); err != nil { return "", fmt.Errorf("streaming separator failed: %w", err) } } // Generate turn prefix summary. turnPrefixText := serializeMessages(turnPrefixMessages) turnPrefixPrompt := "The messages above are the BEGINNING of a long turn that was split. " + "Summarize the work done so far in this turn, preserving tool call results, " + "file changes, and progress. Another LLM will continue this turn." if customInstructions != "" { turnPrefixPrompt += "\n\nAdditional instructions: " + customInstructions } turnPrefixSummary, err := generateSummary(ctx, model, turnPrefixText, opts, turnPrefixPrompt, onChunk) if err != nil { return "", fmt.Errorf("split turn prefix summary failed: %w", err) } // Merge the two summaries. if historySummary != "" && turnPrefixSummary != "" { return historySummary + "\n\n---\n\n## Current Turn (in progress)\n\n" + turnPrefixSummary, nil } if turnPrefixSummary != "" { return turnPrefixSummary, nil } return historySummary, nil } // generateSummary calls the LLM to produce a structured summary. // If onChunk is provided, the summary is streamed using Agent.Stream(). func generateSummary( ctx context.Context, model fantasy.LanguageModel, conversationText string, opts CompactionOptions, customInstructions string, onChunk StreamCallback, ) (string, error) { userPrompt := opts.SummaryPrompt if userPrompt == "" { userPrompt = defaultSummaryPrompt } if customInstructions != "" { userPrompt += "\n\nAdditional instructions: " + customInstructions } summaryAgent := fantasy.NewAgent(model, fantasy.WithSystemPrompt(defaultSystemPrompt), ) prompt := conversationText + "\n\n" + userPrompt // Use streaming if onChunk is provided. if onChunk != nil { var fullText strings.Builder _, err := summaryAgent.Stream(ctx, fantasy.AgentStreamCall{ Prompt: prompt, OnTextDelta: func(_, delta string) error { if delta != "" { fullText.WriteString(delta) return onChunk(delta) } return nil }, }) if err != nil { return "", fmt.Errorf("compaction summarisation (streaming) failed: %w", err) } return fullText.String(), nil } // Non-streaming path. result, err := summaryAgent.Generate(ctx, fantasy.AgentCall{ Prompt: prompt, }) if err != nil { return "", fmt.Errorf("compaction summarisation failed: %w", err) } return result.Response.Content.Text(), nil }