kit/internal/compaction/compaction.go

// Package compaction provides context window management with token estimation,
// compaction triggers, and LLM-based conversation summarization.
//
// The algorithm preserves a token budget of recent
// messages (KeepRecentTokens, default 20 000) rather than a fixed message
// count. Auto-compaction fires when estimated context usage exceeds
// contextWindow − ReserveTokens.
//
// Features modelled after pi's compaction system:
//   - Tool result truncation (2000 char max) during serialisation
//   - Split turn handling: when a single turn exceeds the keep budget,
//     the turn prefix is summarised separately and merged
//   - Cumulative file tracking: read and modified files extracted from
//     tool calls and carried forward across compactions
package compaction

import (
	"context"
	"encoding/json"
	"fmt"
	"strings"

	"charm.land/fantasy"
)

// ---------------------------------------------------------------------------
// Token estimation
// ---------------------------------------------------------------------------

// estimateTokens provides a rough token count (~4 chars per token).
func estimateTokens(text string) int {
	return len(text) / 4
}

// EstimateMessageTokens estimates total tokens across a slice of fantasy
// messages by summing the estimated tokens for every text part.
func EstimateMessageTokens(messages []fantasy.Message) int {
	total := 0
	for _, msg := range messages {
		total += estimateSingleMessageTokens(msg)
	}
	return total
}

// estimateSingleMessageTokens returns the estimated token count for one
// message.
func estimateSingleMessageTokens(msg fantasy.Message) int {
	total := 0
	for _, part := range msg.Content {
		if tp, ok := part.(fantasy.TextPart); ok {
			total += estimateTokens(tp.Text)
		}
	}
	return total
}

// ---------------------------------------------------------------------------
// Auto-compact trigger
// ---------------------------------------------------------------------------

// ShouldCompact reports whether auto-compaction should fire.
// Formula: contextTokens > contextWindow − reserveTokens.
func ShouldCompact(messages []fantasy.Message, contextWindow int, reserveTokens int) bool {
	if contextWindow <= 0 || reserveTokens <= 0 {
		return false
	}
	estimated := EstimateMessageTokens(messages)
	return estimated > contextWindow-reserveTokens
}

// ---------------------------------------------------------------------------
// Options & defaults
// ---------------------------------------------------------------------------

// CompactionResult contains statistics from a compaction operation.
type CompactionResult struct {
	Summary         string   // LLM-generated summary of compacted messages
	OriginalTokens  int      // Estimated token count before compaction
	CompactedTokens int      // Estimated token count after compaction
	MessagesRemoved int      // Number of messages replaced by the summary
	CutPoint        int      // Index in the original messages where the cut was made
	ReadFiles       []string // Files read during the compacted conversation
	ModifiedFiles   []string // Files modified during the compacted conversation
}

// CompactionOptions configures compaction behaviour. Token-based defaults
// are applied for zero-value fields.
type CompactionOptions struct {
	ContextWindow    int    // Model's context window size (tokens)
	ReserveTokens    int    // Tokens to reserve for LLM response, default 16384
	KeepRecentTokens int    // Recent tokens to preserve (not summarised), default 20000
	SummaryPrompt    string // Custom summary prompt (empty = use default)
}

// defaults fills zero-value fields with sensible defaults.
func (o *CompactionOptions) defaults() {
	if o.ReserveTokens <= 0 {
		o.ReserveTokens = 16384
	}
	if o.KeepRecentTokens <= 0 {
		o.KeepRecentTokens = 20000
	}
}

// defaultSystemPrompt is the system prompt sent to the summarisation LLM.

const defaultSystemPrompt = `You are a context summarization assistant. Your task is to read a conversation between a user and an AI coding assistant, then produce a structured summary following the exact format specified.

Do NOT continue the conversation. Do NOT respond to any questions in the conversation. ONLY output the structured summary.`

// defaultSummaryPrompt is the user prompt appended after the serialised
// conversation.
const defaultSummaryPrompt = `The messages above are a conversation to summarize. Create a structured context checkpoint summary that another LLM will use to continue the work.

Use this EXACT format:

## Goal
[What is the user trying to accomplish? Can be multiple items if the session covers different tasks.]

## Constraints & Preferences
- [Any constraints, preferences, or requirements mentioned by user]
- [Or "(none)" if none were mentioned]

## Progress
### Done
- [x] [Completed tasks/changes]

### In Progress
- [ ] [Current work]

### Blocked
- [Issues preventing progress, if any]

## Key Decisions
- **[Decision]**: [Brief rationale]

## Next Steps
1. [Ordered list of what should happen next]

## Critical Context
- [Any data, examples, or references needed to continue]
- [Or "(none)" if not applicable]

<read-files>
[One file path per line for files that were read during the conversation]
</read-files>

<modified-files>
[One file path per line for files that were created, edited, or written during the conversation]
</modified-files>

Keep each section concise. Preserve exact file paths, function names, and error messages.`

// ---------------------------------------------------------------------------
// Tool result truncation
// ---------------------------------------------------------------------------

// maxToolResultChars is the maximum length of tool result text preserved
// during serialisation. Longer results are truncated with a marker.
const maxToolResultChars = 2000

// truncateToolResult truncates text to maxToolResultChars, appending a
// marker indicating how many characters were removed.
func truncateToolResult(text string) string {
	if len(text) <= maxToolResultChars {
		return text
	}
	truncated := len(text) - maxToolResultChars
	return text[:maxToolResultChars] + fmt.Sprintf("\n[...%d chars truncated]", truncated)
}

// ---------------------------------------------------------------------------
// Cut point (token-based)
// ---------------------------------------------------------------------------

// isValidCutPoint returns true if the message at index i is a valid place to
// split the conversation. Tool-role messages (tool results) must stay with
// their preceding assistant tool-call, so they are never valid cut points.
func isValidCutPoint(msg fantasy.Message) bool {
	return msg.Role != fantasy.MessageRoleTool
}

// findTurnStart returns the index of the user message that starts the turn
// containing messages[idx]. A "turn" starts with a user message and includes
// all subsequent assistant/tool messages until the next user message.
func findTurnStart(messages []fantasy.Message, idx int) int {
	for i := idx; i >= 0; i-- {
		if messages[i].Role == fantasy.MessageRoleUser {
			return i
		}
	}
	return 0
}

// FindCutPoint walks backward from the end of messages, accumulating tokens
// until the keepRecentTokens budget is filled. Returns the index that
// separates "old" messages (0..cutPoint-1, to be summarised) from "recent"
// messages (cutPoint..end, to be preserved).
//
// The cut point prefers turn boundaries (user messages). When a single turn
// exceeds the budget, the cut lands mid-turn (IsSplitTurn returns true).
//
// Returns 0 if there are fewer than 2 messages or all messages fit within
// the keep budget.
func FindCutPoint(messages []fantasy.Message, keepRecentTokens int) int {
	if len(messages) < 2 {
		return 0
	}
	if keepRecentTokens <= 0 {
		keepRecentTokens = 20000
	}

	accumulated := 0

	for i := len(messages) - 1; i >= 0; i-- {
		accumulated += estimateSingleMessageTokens(messages[i])
		if accumulated > keepRecentTokens {
			cut := i + 1

			// If the last message alone exceeds the budget, keep it
			// anyway and summarise everything before it.
			if cut >= len(messages) {
				cut = len(messages) - 1
			}

			// Land on a valid cut point — scan forward past tool-result
			// messages (they must stay with their preceding tool call).
			for cut < len(messages) && !isValidCutPoint(messages[cut]) {
				cut++
			}
			if cut >= len(messages) {
				return 0
			}

			// Need at least 2 messages before the cut to produce a
			// meaningful summary.
			if cut < 2 {
				return 0
			}
			return cut
		}
	}

	// All messages fit within the budget — nothing to compact.
	return 0
}

// IsSplitTurn returns true if the cut point lands in the middle of a turn
// (i.e. the message at cutPoint is not a user message, meaning we're
// splitting a single turn's assistant/tool messages).
func IsSplitTurn(messages []fantasy.Message, cutPoint int) bool {
	if cutPoint <= 0 || cutPoint >= len(messages) {
		return false
	}
	// If the cut point is at a user message, it's a clean turn boundary.
	if messages[cutPoint].Role == fantasy.MessageRoleUser {
		return false
	}
	// Otherwise we're cutting mid-turn — check if the turn started before
	// the cut point.
	turnStart := findTurnStart(messages, cutPoint)
	return turnStart < cutPoint
}

// forceCutPoint returns a cut point that keeps only the last non-tool
// message, summarising everything before it. Used when the budget-based
// FindCutPoint returns 0 but the caller wants to compact anyway (manual
// /compact). Returns 0 if no valid cut exists.
func forceCutPoint(messages []fantasy.Message) int {
	// Walk backward to find the last valid (non-tool) message boundary.
	for i := len(messages) - 1; i >= 2; i-- {
		if isValidCutPoint(messages[i]) {
			return i
		}
	}
	return 0
}

// ---------------------------------------------------------------------------
// File tracking
// ---------------------------------------------------------------------------

// fileOps contains cumulative file operation tracking.
type fileOps struct {
	ReadFiles     map[string]bool
	ModifiedFiles map[string]bool
}

func newFileOps() *fileOps {
	return &fileOps{
		ReadFiles:     make(map[string]bool),
		ModifiedFiles: make(map[string]bool),
	}
}

// extractFileOps scans messages for tool calls and extracts file paths.
// It recognises the built-in Kit tools: read, write, edit, bash, grep, find, ls.
func extractFileOps(messages []fantasy.Message) *fileOps {
	ops := newFileOps()
	for _, msg := range messages {
		for _, part := range msg.Content {
			tc, ok := part.(fantasy.ToolCallPart)
			if !ok {
				continue
			}

			// Parse the JSON input to extract path arguments.
			var args map[string]any
			if err := json.Unmarshal([]byte(tc.Input), &args); err != nil {
				continue
			}

			path, _ := args["path"].(string)
			if path == "" {
				continue
			}

			switch tc.ToolName {
			case "read", "grep", "find", "ls":
				ops.ReadFiles[path] = true
			case "write", "edit":
				ops.ModifiedFiles[path] = true
			}
		}
	}
	return ops
}

// merge combines another fileOps into this one (for cumulative tracking).
func (f *fileOps) merge(other *fileOps) {
	if other == nil {
		return
	}
	for k := range other.ReadFiles {
		f.ReadFiles[k] = true
	}
	for k := range other.ModifiedFiles {
		f.ModifiedFiles[k] = true
	}
}

// mergeSlices adds previously tracked file lists (from a prior compaction).
func (f *fileOps) mergeSlices(readFiles, modifiedFiles []string) {
	for _, p := range readFiles {
		f.ReadFiles[p] = true
	}
	for _, p := range modifiedFiles {
		f.ModifiedFiles[p] = true
	}
}

// sortedKeys returns the keys of a bool map sorted alphabetically.
func sortedKeys(m map[string]bool) []string {
	if len(m) == 0 {
		return nil
	}
	keys := make([]string, 0, len(m))
	for k := range m {
		keys = append(keys, k)
	}
	// Simple sort — no need for sort package for small lists.
	for i := 0; i < len(keys); i++ {
		for j := i + 1; j < len(keys); j++ {
			if keys[j] < keys[i] {
				keys[i], keys[j] = keys[j], keys[i]
			}
		}
	}
	return keys
}

// ---------------------------------------------------------------------------
// Message serialisation
// ---------------------------------------------------------------------------

// roleLabel returns a human-readable label for a fantasy message role.
func roleLabel(role fantasy.MessageRole) string {
	switch role {
	case fantasy.MessageRoleUser:
		return "[User]"
	case fantasy.MessageRoleAssistant:
		return "[Assistant]"
	case fantasy.MessageRoleTool:
		return "[Tool result]"
	case fantasy.MessageRoleSystem:
		return "[System]"
	default:
		return "[" + string(role) + "]"
	}
}

// serializeMessages converts a slice of fantasy messages into a plain-text
// representation suitable for sending to the summarisation LLM. Tool result
// text is truncated to maxToolResultChars to keep the summarisation request
// within reasonable token budgets.
func serializeMessages(messages []fantasy.Message) string {
	var sb strings.Builder
	for _, msg := range messages {
		sb.WriteString(roleLabel(msg.Role))
		sb.WriteString(":\n")
		for _, part := range msg.Content {
			switch p := part.(type) {
			case fantasy.TextPart:
				if msg.Role == fantasy.MessageRoleTool {
					sb.WriteString(truncateToolResult(p.Text))
				} else {
					sb.WriteString(p.Text)
				}
			case fantasy.ToolCallPart:
				fmt.Fprintf(&sb, "[Tool call: %s(%s)]", p.ToolName, truncateToolResult(p.Input))
			case fantasy.ReasoningPart:
				fmt.Fprintf(&sb, "[Thinking]: %s", truncateToolResult(p.Text))
			}
		}
		sb.WriteString("\n\n")
	}
	return sb.String()
}

// ---------------------------------------------------------------------------
// Compact
// ---------------------------------------------------------------------------

// PreviousCompaction carries file tracking state from a prior compaction so
// that file operations accumulate across multiple compactions.
type PreviousCompaction struct {
	ReadFiles     []string
	ModifiedFiles []string
}

// StreamCallback is called for each chunk of text during streaming compaction.
// Return a non-nil error to cancel the stream.
type StreamCallback func(delta string) error

// Compact summarises older messages using the LLM, returning the compaction
// result and a new message slice (summary message + preserved recent
// messages).
//
// The model parameter is the same fantasy.LanguageModel used for regular
// generation — compaction creates a disposable fantasy agent with no tools to
// produce the summary.
//
// customInstructions is optional text appended to the summary prompt (e.g.
// "Focus on the API design decisions"). Pass "" to use the default prompt
// only.
//
// prev carries file tracking from a previous compaction for cumulative
// tracking. Pass nil if there is no prior compaction.
// onChunk is an optional callback for streaming summary text. Pass nil for
// non-streaming compaction.
func Compact(
	ctx context.Context,
	model fantasy.LanguageModel,
	messages []fantasy.Message,
	opts CompactionOptions,
	customInstructions string,
	prev *PreviousCompaction,
	onChunk StreamCallback,
) (*CompactionResult, []fantasy.Message, error) {
	opts.defaults()

	if len(messages) < 2 {
		return nil, messages, nil
	}

	cutPoint := FindCutPoint(messages, opts.KeepRecentTokens)
	if cutPoint == 0 {
		// All messages fit within the keep budget. Force a cut that
		// keeps only the last non-tool message — always compact when
		// the user explicitly requests it.
		cutPoint = forceCutPoint(messages)
		if cutPoint == 0 {
			return nil, messages, nil
		}
	}

	oldMessages := messages[:cutPoint]
	recentMessages := messages[cutPoint:]
	originalTokens := EstimateMessageTokens(messages)

	// Extract file operations from old messages.
	ops := extractFileOps(oldMessages)
	// Accumulate from previous compaction if present.
	if prev != nil {
		ops.mergeSlices(prev.ReadFiles, prev.ModifiedFiles)
	}
	// Also scan recent messages for file ops (they'll be carried forward).
	recentOps := extractFileOps(recentMessages)
	ops.merge(recentOps)

	// Handle split turns: when the cut lands mid-turn, summarise the turn
	// prefix separately and merge with the history summary.
	var summaryText string
	var err error

	if IsSplitTurn(messages, cutPoint) {
		summaryText, err = compactSplitTurn(ctx, model, oldMessages, messages, cutPoint, opts, customInstructions, onChunk)
	} else {
		summaryText, err = compactNormal(ctx, model, oldMessages, opts, customInstructions, onChunk)
	}
	if err != nil {
		return nil, nil, err
	}

	if summaryText == "" {
		return nil, nil, fmt.Errorf("compaction produced an empty summary")
	}

	// Build the new message list: summary as a system message + preserved recent.
	summaryMessage := fantasy.Message{
		Role: fantasy.MessageRoleSystem,
		Content: []fantasy.MessagePart{
			fantasy.TextPart{
				Text: fmt.Sprintf("[Conversation summary — earlier messages were compacted]\n\n%s", summaryText),
			},
		},
	}

	newMessages := make([]fantasy.Message, 0, 1+len(recentMessages))
	newMessages = append(newMessages, summaryMessage)
	newMessages = append(newMessages, recentMessages...)

	compactedTokens := EstimateMessageTokens(newMessages)

	return &CompactionResult{
		Summary:         summaryText,
		OriginalTokens:  originalTokens,
		CompactedTokens: compactedTokens,
		MessagesRemoved: len(oldMessages),
		CutPoint:        cutPoint,
		ReadFiles:       sortedKeys(ops.ReadFiles),
		ModifiedFiles:   sortedKeys(ops.ModifiedFiles),
	}, newMessages, nil
}

// compactNormal generates a summary for a clean turn-boundary cut.
// If onChunk is provided, text deltas are streamed to it.
func compactNormal(
	ctx context.Context,
	model fantasy.LanguageModel,
	oldMessages []fantasy.Message,
	opts CompactionOptions,
	customInstructions string,
	onChunk StreamCallback,
) (string, error) {
	conversationText := serializeMessages(oldMessages)
	return generateSummary(ctx, model, conversationText, opts, customInstructions, onChunk)
}

// compactSplitTurn handles the case where the cut point lands mid-turn.
// It generates two summaries and merges them:
//  1. History summary: all complete turns before the split turn
//  2. Turn prefix summary: the early part of the split turn (from the turn's
//     user message up to the cut point)
//
// The merged result preserves context from both the older history and the
// beginning of the current long turn.
// If onChunk is provided, both summaries and the separator are streamed.
func compactSplitTurn(
	ctx context.Context,
	model fantasy.LanguageModel,
	oldMessages []fantasy.Message,
	allMessages []fantasy.Message,
	cutPoint int,
	opts CompactionOptions,
	customInstructions string,
	onChunk StreamCallback,
) (string, error) {
	// Find where the split turn starts.
	turnStart := findTurnStart(allMessages, cutPoint)

	// Messages before the turn are the "history" portion.
	historyMessages := oldMessages
	if turnStart > 0 && turnStart < len(oldMessages) {
		historyMessages = oldMessages[:turnStart]
	}

	// The turn prefix: from turnStart to cutPoint.
	turnPrefixMessages := allMessages[turnStart:cutPoint]

	var historySummary string
	var err error

	// Generate history summary if there are complete turns before the split.
	if len(historyMessages) >= 2 {
		historySummary, err = generateSummary(ctx, model,
			serializeMessages(historyMessages), opts, "", onChunk)
		if err != nil {
			return "", fmt.Errorf("split turn history summary failed: %w", err)
		}
	}

	// Stream the separator between history and turn prefix summaries.
	if onChunk != nil && historySummary != "" {
		if err := onChunk("\n\n---\n\n## Current Turn (in progress)\n\n"); err != nil {
			return "", fmt.Errorf("streaming separator failed: %w", err)
		}
	}

	// Generate turn prefix summary.
	turnPrefixText := serializeMessages(turnPrefixMessages)
	turnPrefixPrompt := "The messages above are the BEGINNING of a long turn that was split. " +
		"Summarize the work done so far in this turn, preserving tool call results, " +
		"file changes, and progress. Another LLM will continue this turn."
	if customInstructions != "" {
		turnPrefixPrompt += "\n\nAdditional instructions: " + customInstructions
	}

	turnPrefixSummary, err := generateSummary(ctx, model, turnPrefixText, opts, turnPrefixPrompt, onChunk)
	if err != nil {
		return "", fmt.Errorf("split turn prefix summary failed: %w", err)
	}

	// Merge the two summaries.
	if historySummary != "" && turnPrefixSummary != "" {
		return historySummary + "\n\n---\n\n## Current Turn (in progress)\n\n" + turnPrefixSummary, nil
	}
	if turnPrefixSummary != "" {
		return turnPrefixSummary, nil
	}
	return historySummary, nil
}

// generateSummary calls the LLM to produce a structured summary.
// If onChunk is provided, the summary is streamed using Agent.Stream().
func generateSummary(
	ctx context.Context,
	model fantasy.LanguageModel,
	conversationText string,
	opts CompactionOptions,
	customInstructions string,
	onChunk StreamCallback,
) (string, error) {
	userPrompt := opts.SummaryPrompt
	if userPrompt == "" {
		userPrompt = defaultSummaryPrompt
	}
	if customInstructions != "" {
		userPrompt += "\n\nAdditional instructions: " + customInstructions
	}

	summaryAgent := fantasy.NewAgent(model,
		fantasy.WithSystemPrompt(defaultSystemPrompt),
	)

	prompt := conversationText + "\n\n" + userPrompt

	// Use streaming if onChunk is provided.
	if onChunk != nil {
		var fullText strings.Builder
		_, err := summaryAgent.Stream(ctx, fantasy.AgentStreamCall{
			Prompt: prompt,
			OnTextDelta: func(_, delta string) error {
				if delta != "" {
					fullText.WriteString(delta)
					return onChunk(delta)
				}
				return nil
			},
		})
		if err != nil {
			return "", fmt.Errorf("compaction summarisation (streaming) failed: %w", err)
		}
		return fullText.String(), nil
	}

	// Non-streaming path.
	result, err := summaryAgent.Generate(ctx, fantasy.AgentCall{
		Prompt: prompt,
	})
	if err != nil {
		return "", fmt.Errorf("compaction summarisation failed: %w", err)
	}

	return result.Response.Content.Text(), nil
}