add /compact command with Pi-style token-based compaction

Rework compaction to match Pi's method: - Token-based cut point (KeepRecentTokens=20k) instead of fixed message count - Auto-trigger: contextTokens > contextWindow - reserveTokens (16k default) - Pi's structured summary prompt (Goal/Progress/Decisions/Next Steps format) - /compact [instructions] supports custom focus text - Force compaction on manual request (only gate: >= 2 messages) - Summary displayed in styled block with sky/cyan border and token stats - Spinner properly animated during compaction
2026-06-14 03:30:26 +00:00 · 2026-02-27 17:05:25 +03:00
parent 3804daa6fa
commit 215a3186ff
9 changed files with 597 additions and 131 deletions
@@ -169,6 +169,65 @@ func (a *App) GetTreeSession() *session.TreeManager {
 	return a.opts.TreeSession
 }

+// CompactConversation summarises older messages to free context space. It
+// returns an error synchronously if compaction cannot start (agent busy or
+// app closed). The actual compaction runs in a background goroutine and
+// delivers CompactCompleteEvent or CompactErrorEvent through the registered
+// tea.Program. customInstructions is optional text appended to the summary
+// prompt (e.g. "Focus on the API design decisions").
+//
+// Satisfies ui.AppController.
+func (a *App) CompactConversation(customInstructions string) error {
+	a.mu.Lock()
+	if a.closed {
+		a.mu.Unlock()
+		return fmt.Errorf("app is closed")
+	}
+	if a.busy {
+		a.mu.Unlock()
+		return fmt.Errorf("cannot compact while the agent is working")
+	}
+	if a.opts.Kit == nil {
+		a.mu.Unlock()
+		return fmt.Errorf("SDK instance not available")
+	}
+	a.busy = true
+	a.wg.Add(1)
+	a.mu.Unlock()
+
+	go func() {
+		defer a.wg.Done()
+		defer func() {
+			a.mu.Lock()
+			a.busy = false
+			a.mu.Unlock()
+		}()
+
+		result, err := a.opts.Kit.Compact(a.rootCtx, nil, customInstructions)
+		if err != nil {
+			a.sendEvent(CompactErrorEvent{Err: err})
+			return
+		}
+		if result == nil {
+			a.sendEvent(CompactErrorEvent{Err: fmt.Errorf("nothing to compact")})
+			return
+		}
+
+		// Sync in-memory store with the compacted session.
+		if a.opts.TreeSession != nil {
+			a.store.Replace(a.opts.TreeSession.GetFantasyMessages())
+		}
+
+		a.sendEvent(CompactCompleteEvent{
+			Summary:         result.Summary,
+			OriginalTokens:  result.OriginalTokens,
+			CompactedTokens: result.CompactedTokens,
+			MessagesRemoved: result.MessagesRemoved,
+		})
+	}()
+	return nil
+}
+
 // --------------------------------------------------------------------------
 // Non-interactive execution
 // --------------------------------------------------------------------------
@@ -94,6 +94,25 @@ type MessageCreatedEvent struct {
 	Message fantasy.Message
 }

+// CompactCompleteEvent is sent when a /compact operation finishes successfully.
+// It carries the summary text and before/after statistics.
+type CompactCompleteEvent struct {
+	// Summary is the LLM-generated structured summary of the compacted messages.
+	Summary string
+	// OriginalTokens is the estimated token count before compaction.
+	OriginalTokens int
+	// CompactedTokens is the estimated token count after compaction.
+	CompactedTokens int
+	// MessagesRemoved is the number of messages that were summarised away.
+	MessagesRemoved int
+}
+
+// CompactErrorEvent is sent when a /compact operation fails.
+type CompactErrorEvent struct {
+	// Err is the error that caused compaction to fail.
+	Err error
+}
+
 // ExtensionPrintEvent is sent when an extension calls ctx.Print, ctx.PrintInfo,
 // ctx.PrintError, or ctx.PrintBlock. The TUI renders it via the appropriate
 // renderer and tea.Println (scrollback); the CLI handler uses
@@ -1,5 +1,10 @@
 // Package compaction provides context window management with token estimation,
 // compaction triggers, and LLM-based conversation summarization.
+//
+// The algorithm mirrors Pi's approach: preserve a token budget of recent
+// messages (KeepRecentTokens, default 20 000) rather than a fixed message
+// count. Auto-compaction fires when estimated context usage exceeds
+// contextWindow − ReserveTokens.
 package compaction

 import (
@@ -10,36 +15,55 @@ import (
 	"charm.land/fantasy"
 )

+// ---------------------------------------------------------------------------
+// Token estimation
+// ---------------------------------------------------------------------------
+
 // EstimateTokens provides a rough token count (~4 chars per token).
 func EstimateTokens(text string) int {
 	return len(text) / 4
 }

-// EstimateMessageTokens estimates total tokens across a slice of fantasy messages
-// by summing the estimated tokens for every text part.
+// EstimateMessageTokens estimates total tokens across a slice of fantasy
+// messages by summing the estimated tokens for every text part.
 func EstimateMessageTokens(messages []fantasy.Message) int {
 	total := 0
 	for _, msg := range messages {
-		for _, part := range msg.Content {
-			if tp, ok := part.(fantasy.TextPart); ok {
-				total += EstimateTokens(tp.Text)
-			}
+		total += estimateSingleMessageTokens(msg)
+	}
+	return total
+}
+
+// estimateSingleMessageTokens returns the estimated token count for one
+// message.
+func estimateSingleMessageTokens(msg fantasy.Message) int {
+	total := 0
+	for _, part := range msg.Content {
+		if tp, ok := part.(fantasy.TextPart); ok {
+			total += EstimateTokens(tp.Text)
 		}
 	}
 	return total
 }

-// ShouldCompact reports whether the conversation exceeds the threshold
-// percentage of the context limit. thresholdPct should be in the range 0.0–1.0
-// (e.g. 0.8 means 80%).
-func ShouldCompact(messages []fantasy.Message, contextLimit int, thresholdPct float64) bool {
-	if contextLimit <= 0 || thresholdPct <= 0 {
+// ---------------------------------------------------------------------------
+// Auto-compact trigger
+// ---------------------------------------------------------------------------
+
+// ShouldCompact reports whether auto-compaction should fire. It uses Pi's
+// formula: contextTokens > contextWindow − reserveTokens.
+func ShouldCompact(messages []fantasy.Message, contextWindow int, reserveTokens int) bool {
+	if contextWindow <= 0 || reserveTokens <= 0 {
 		return false
 	}
 	estimated := EstimateMessageTokens(messages)
-	return float64(estimated) >= float64(contextLimit)*thresholdPct
+	return estimated > contextWindow-reserveTokens
 }

+// ---------------------------------------------------------------------------
+// Options & defaults
+// ---------------------------------------------------------------------------
+
 // CompactionResult contains statistics from a compaction operation.
 type CompactionResult struct {
 	Summary         string // LLM-generated summary of compacted messages
@@ -48,74 +72,170 @@ type CompactionResult struct {
 	MessagesRemoved int    // Number of messages replaced by the summary
 }

-// CompactionOptions configures compaction behaviour.
+// CompactionOptions configures compaction behaviour. Pi-style token-based
+// defaults are applied for zero-value fields.
 type CompactionOptions struct {
-	ContextLimit   int     // Model's context window size (tokens)
-	ThresholdPct   float64 // Trigger threshold (0.0–1.0), default 0.8
-	PreserveRecent int     // Number of recent messages to keep, default 10
-	SummaryPrompt  string  // Custom summary prompt (empty = use default)
+	ContextWindow    int    // Model's context window size (tokens)
+	ReserveTokens    int    // Tokens to reserve for LLM response, default 16384
+	KeepRecentTokens int    // Recent tokens to preserve (not summarised), default 20000
+	SummaryPrompt    string // Custom summary prompt (empty = use default)
 }

-// defaults fills zero-value fields with sensible defaults.
+// defaults fills zero-value fields with sensible Pi-style defaults.
 func (o *CompactionOptions) defaults() {
-	if o.ThresholdPct <= 0 {
-		o.ThresholdPct = 0.8
+	if o.ReserveTokens <= 0 {
+		o.ReserveTokens = 16384
 	}
-	if o.PreserveRecent <= 0 {
-		o.PreserveRecent = 10
+	if o.KeepRecentTokens <= 0 {
+		o.KeepRecentTokens = 20000
 	}
 }

-// defaultSummaryPrompt is the system prompt used to summarise older messages.
-const defaultSummaryPrompt = `You are a conversation summarizer. Summarize the following conversation messages into a concise summary that preserves:
-1. Key decisions and conclusions reached
-2. Important context and facts established
-3. Current task state and progress
-4. Any pending actions or open questions
+// defaultSystemPrompt is the system prompt sent to the summarisation LLM.
+// Matches Pi's compaction system prompt.
+const defaultSystemPrompt = `You are a context summarization assistant. Your task is to read a conversation between a user and an AI coding assistant, then produce a structured summary following the exact format specified.

-Be concise but thorough. Output only the summary text, no preamble.`
+Do NOT continue the conversation. Do NOT respond to any questions in the conversation. ONLY output the structured summary.`

-// FindCutPoint determines the index at which to cut messages for compaction.
-// Messages before the cut point will be summarised; messages from the cut
-// point onward are preserved. Returns 0 if no compaction is needed.
-func FindCutPoint(messages []fantasy.Message, preserveRecent int) int {
-	if preserveRecent <= 0 {
-		preserveRecent = 10
-	}
-	if len(messages) <= preserveRecent {
-		return 0 // not enough messages to compact
-	}
-	return len(messages) - preserveRecent
+// defaultSummaryPrompt is the user prompt appended after the serialised
+// conversation. Matches Pi's initial-compaction format.
+const defaultSummaryPrompt = `The messages above are a conversation to summarize. Create a structured context checkpoint summary that another LLM will use to continue the work.
+
+Use this EXACT format:
+
+## Goal
+[What is the user trying to accomplish? Can be multiple items if the session covers different tasks.]
+
+## Constraints & Preferences
+- [Any constraints, preferences, or requirements mentioned by user]
+- [Or "(none)" if none were mentioned]
+
+## Progress
+### Done
+- [x] [Completed tasks/changes]
+
+### In Progress
+- [ ] [Current work]
+
+### Blocked
+- [Issues preventing progress, if any]
+
+## Key Decisions
+- **[Decision]**: [Brief rationale]
+
+## Next Steps
+1. [Ordered list of what should happen next]
+
+## Critical Context
+- [Any data, examples, or references needed to continue]
+- [Or "(none)" if not applicable]
+
+Keep each section concise. Preserve exact file paths, function names, and error messages.`
+
+// ---------------------------------------------------------------------------
+// Cut point (token-based, Pi-style)
+// ---------------------------------------------------------------------------
+
+// isValidCutPoint returns true if the message at index i is a valid place to
+// split the conversation. Tool-role messages (tool results) must stay with
+// their preceding assistant tool-call, so they are never valid cut points.
+func isValidCutPoint(msg fantasy.Message) bool {
+	return msg.Role != fantasy.MessageRoleTool
 }

-// Compact summarises older messages using the LLM, returning the compaction
-// result and a new message slice (summary message + preserved recent messages).
+// FindCutPoint walks backward from the end of messages, accumulating tokens
+// until the keepRecentTokens budget is filled. Returns the index that
+// separates "old" messages (0..cutPoint-1, to be summarised) from "recent"
+// messages (cutPoint..end, to be preserved).
 //
-// The model parameter is the same fantasy.LanguageModel used for regular
-// generation — compaction creates a disposable fantasy agent with no tools to
-// produce the summary.
-func Compact(
-	ctx context.Context,
-	model fantasy.LanguageModel,
-	messages []fantasy.Message,
-	opts CompactionOptions,
-) (*CompactionResult, []fantasy.Message, error) {
-	opts.defaults()
-
-	cutPoint := FindCutPoint(messages, opts.PreserveRecent)
-	if cutPoint == 0 {
-		return nil, messages, nil // nothing to compact
+// Returns 0 if there are fewer than 2 messages or all messages fit within
+// the keep budget.
+func FindCutPoint(messages []fantasy.Message, keepRecentTokens int) int {
+	if len(messages) < 2 {
+		return 0
+	}
+	if keepRecentTokens <= 0 {
+		keepRecentTokens = 20000
 	}

-	oldMessages := messages[:cutPoint]
-	recentMessages := messages[cutPoint:]
-	originalTokens := EstimateMessageTokens(messages)
+	accumulated := 0

-	// Build a textual representation of the messages to summarise.
+	for i := len(messages) - 1; i >= 0; i-- {
+		accumulated += estimateSingleMessageTokens(messages[i])
+		if accumulated > keepRecentTokens {
+			cut := i + 1
+
+			// If the last message alone exceeds the budget, keep it
+			// anyway and summarise everything before it.
+			if cut >= len(messages) {
+				cut = len(messages) - 1
+			}
+
+			// Land on a valid cut point — scan forward past tool-result
+			// messages (they must stay with their preceding tool call).
+			for cut < len(messages) && !isValidCutPoint(messages[cut]) {
+				cut++
+			}
+			if cut >= len(messages) {
+				return 0
+			}
+
+			// Need at least 2 messages before the cut to produce a
+			// meaningful summary.
+			if cut < 2 {
+				return 0
+			}
+			return cut
+		}
+	}
+
+	// All messages fit within the budget — nothing to compact.
+	return 0
+}
+
+// forceCutPoint returns a cut point that keeps only the last non-tool
+// message, summarising everything before it. Used when the budget-based
+// FindCutPoint returns 0 but the caller wants to compact anyway (manual
+// /compact). Returns 0 if no valid cut exists.
+func forceCutPoint(messages []fantasy.Message) int {
+	// Walk backward to find the last valid (non-tool) message boundary.
+	for i := len(messages) - 1; i >= 2; i-- {
+		if isValidCutPoint(messages[i]) {
+			return i
+		}
+	}
+	return 0
+}
+
+// ---------------------------------------------------------------------------
+// Message serialisation (Pi-style)
+// ---------------------------------------------------------------------------
+
+// roleLabel returns a human-readable label for a fantasy message role,
+// matching Pi's serialisation format.
+func roleLabel(role fantasy.MessageRole) string {
+	switch role {
+	case fantasy.MessageRoleUser:
+		return "[User]"
+	case fantasy.MessageRoleAssistant:
+		return "[Assistant]"
+	case fantasy.MessageRoleTool:
+		return "[Tool result]"
+	case fantasy.MessageRoleSystem:
+		return "[System]"
+	default:
+		return "[" + string(role) + "]"
+	}
+}
+
+// serializeMessages converts a slice of fantasy messages into a plain-text
+// representation suitable for sending to the summarisation LLM. The format
+// mirrors Pi's compaction serialisation.
+func serializeMessages(messages []fantasy.Message) string {
 	var sb strings.Builder
-	for _, msg := range oldMessages {
-		sb.WriteString(string(msg.Role))
-		sb.WriteString(": ")
+	for _, msg := range messages {
+		sb.WriteString(roleLabel(msg.Role))
+		sb.WriteString(":\n")
 		for _, part := range msg.Content {
 			if tp, ok := part.(fantasy.TextPart); ok {
 				sb.WriteString(tp.Text)
@@ -123,20 +243,70 @@ func Compact(
 		}
 		sb.WriteString("\n\n")
 	}
-	conversationText := sb.String()
+	return sb.String()
+}

-	// Use the provided (or default) summary prompt.
-	summaryPrompt := opts.SummaryPrompt
-	if summaryPrompt == "" {
-		summaryPrompt = defaultSummaryPrompt
+// ---------------------------------------------------------------------------
+// Compact
+// ---------------------------------------------------------------------------
+
+// Compact summarises older messages using the LLM, returning the compaction
+// result and a new message slice (summary message + preserved recent
+// messages).
+//
+// The model parameter is the same fantasy.LanguageModel used for regular
+// generation — compaction creates a disposable fantasy agent with no tools to
+// produce the summary.
+//
+// customInstructions is optional text appended to the summary prompt (e.g.
+// "Focus on the API design decisions"). Pass "" to use the default prompt
+// only.
+func Compact(
+	ctx context.Context,
+	model fantasy.LanguageModel,
+	messages []fantasy.Message,
+	opts CompactionOptions,
+	customInstructions string,
+) (*CompactionResult, []fantasy.Message, error) {
+	opts.defaults()
+
+	if len(messages) < 2 {
+		return nil, messages, nil
+	}
+
+	cutPoint := FindCutPoint(messages, opts.KeepRecentTokens)
+	if cutPoint == 0 {
+		// All messages fit within the keep budget. Force a cut that
+		// keeps only the last non-tool message — matching Pi, which
+		// always compacts when the user explicitly requests it.
+		cutPoint = forceCutPoint(messages)
+		if cutPoint == 0 {
+			return nil, messages, nil
+		}
+	}
+
+	oldMessages := messages[:cutPoint]
+	recentMessages := messages[cutPoint:]
+	originalTokens := EstimateMessageTokens(messages)
+
+	// Serialise old messages to text, matching Pi's format.
+	conversationText := serializeMessages(oldMessages)
+
+	// Build the user-facing prompt: conversation text + summary instructions.
+	userPrompt := opts.SummaryPrompt
+	if userPrompt == "" {
+		userPrompt = defaultSummaryPrompt
+	}
+	if customInstructions != "" {
+		userPrompt += "\n\nAdditional instructions: " + customInstructions
 	}

 	// Create a lightweight agent (no tools) just for summarisation.
 	summaryAgent := fantasy.NewAgent(model,
-		fantasy.WithSystemPrompt(summaryPrompt),
+		fantasy.WithSystemPrompt(defaultSystemPrompt),
 	)
 	result, err := summaryAgent.Generate(ctx, fantasy.AgentCall{
-		Prompt: conversationText,
+		Prompt: conversationText + "\n\n" + userPrompt,
 	})
 	if err != nil {
 		return nil, nil, fmt.Errorf("compaction summarisation failed: %w", err)
@@ -2,6 +2,7 @@ package compaction

 import (
 	"context"
+	"strings"
 	"testing"

 	"charm.land/fantasy"
@@ -14,6 +15,16 @@ func makeTextMessage(role fantasy.MessageRole, text string) fantasy.Message {
 	}
 }

+// makeTextMessageN creates a message whose text is exactly n characters long
+// (≈ n/4 estimated tokens).
+func makeTextMessageN(role fantasy.MessageRole, n int) fantasy.Message {
+	return makeTextMessage(role, strings.Repeat("a", n))
+}
+
+// ---------------------------------------------------------------------------
+// Token estimation
+// ---------------------------------------------------------------------------
+
 func TestEstimateTokens(t *testing.T) {
 	tests := []struct {
 		text string
@@ -51,29 +62,30 @@ func TestEstimateMessageTokens_Empty(t *testing.T) {
 	}
 }

+// ---------------------------------------------------------------------------
+// ShouldCompact (Pi-style: contextTokens > contextWindow - reserveTokens)
+// ---------------------------------------------------------------------------
+
 func TestShouldCompact(t *testing.T) {
 	// Create messages that total ~100 tokens (400 chars).
-	longText := make([]byte, 400)
-	for i := range longText {
-		longText[i] = 'a'
-	}
-	msgs := []fantasy.Message{makeTextMessage(fantasy.MessageRoleUser, string(longText))}
+	msgs := []fantasy.Message{makeTextMessageN(fantasy.MessageRoleUser, 400)}

 	tests := []struct {
-		name         string
-		contextLimit int
-		threshold    float64
-		want         bool
+		name          string
+		contextWindow int
+		reserveTokens int
+		want          bool
 	}{
-		{"above threshold", 120, 0.8, true},      // 100 >= 120*0.8=96
-		{"below threshold", 200, 0.8, false},     // 100 < 200*0.8=160
-		{"zero limit", 0, 0.8, false},            // no limit
-		{"zero threshold", 200, 0.0, false},      // no threshold
-		{"exactly at threshold", 125, 0.8, true}, // 100 >= 125*0.8=100
+		{"above threshold", 110, 16, true},       // 100 > 110-16=94 → true
+		{"below threshold", 200, 16, false},      // 100 > 200-16=184 → false
+		{"zero window", 0, 16, false},            // no window
+		{"zero reserve", 200, 0, false},          // no reserve
+		{"exactly at threshold", 116, 16, false}, // 100 > 116-16=100 → false (not >)
+		{"one over", 115, 16, true},              // 100 > 115-16=99 → true
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			got := ShouldCompact(msgs, tt.contextLimit, tt.threshold)
+			got := ShouldCompact(msgs, tt.contextWindow, tt.reserveTokens)
 			if got != tt.want {
 				t.Errorf("ShouldCompact() = %v, want %v", got, tt.want)
 			}
@@ -81,26 +93,41 @@ func TestShouldCompact(t *testing.T) {
 	}
 }

-func TestFindCutPoint(t *testing.T) {
-	msgs := make([]fantasy.Message, 20)
+// ---------------------------------------------------------------------------
+// FindCutPoint (token-based, Pi-style)
+// ---------------------------------------------------------------------------
+
+func TestFindCutPoint_TokenBased(t *testing.T) {
+	// Each message is 400 chars = ~100 tokens.
+	msgs := make([]fantasy.Message, 10)
 	for i := range msgs {
-		msgs[i] = makeTextMessage(fantasy.MessageRoleUser, "msg")
+		if i%2 == 0 {
+			msgs[i] = makeTextMessageN(fantasy.MessageRoleUser, 400)
+		} else {
+			msgs[i] = makeTextMessageN(fantasy.MessageRoleAssistant, 400)
+		}
 	}

 	tests := []struct {
-		name           string
-		preserveRecent int
-		want           int
+		name             string
+		keepRecentTokens int
+		want             int // expected cut point
 	}{
-		{"preserve 10", 10, 10},
-		{"preserve 5", 5, 15},
-		{"preserve all", 20, 0},
-		{"preserve more than total", 25, 0},
-		{"preserve 0 uses default 10", 0, 10},
+		// keepRecentTokens=250 → walk back: msg[9]=100, msg[8]=200 ≤ 250,
+		// msg[7]=300 > 250 → cut = 8.
+		{"keep 250 tokens", 250, 8},
+		// keepRecentTokens=500 → walk back 5 msgs = 500 ≤ 500,
+		// 6th msg = 600 > 500 → cut = 5.
+		{"keep 500 tokens", 500, 5},
+		// keepRecentTokens=1000 → all 10 msgs = 1000, not exceeded → cut = 0.
+		{"keep all", 1000, 0},
+		// keepRecentTokens=50 → msg[9] alone = 100 > 50 → cut = 10,
+		// exceeds len → clamped to 9. 9 ≥ 2 → valid.
+		{"keep very few", 50, 9},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			got := FindCutPoint(msgs, tt.preserveRecent)
+			got := FindCutPoint(msgs, tt.keepRecentTokens)
 			if got != tt.want {
 				t.Errorf("FindCutPoint() = %d, want %d", got, tt.want)
 			}
@@ -108,38 +135,115 @@ func TestFindCutPoint(t *testing.T) {
 	}
 }

+func TestFindCutPoint_TooFewMessages(t *testing.T) {
+	msgs := []fantasy.Message{
+		makeTextMessageN(fantasy.MessageRoleUser, 400),
+	}
+	got := FindCutPoint(msgs, 50)
+	if got != 0 {
+		t.Errorf("FindCutPoint(1 msg) = %d, want 0", got)
+	}
+}
+
+func TestFindCutPoint_SkipsToolResults(t *testing.T) {
+	// [user, assistant, tool, user, assistant]
+	// Each 400 chars = 100 tokens. keepRecentTokens=150 → walk back:
+	//   msg[4] (assistant) = 100 ≤ 150
+	//   msg[3] (user) = 200 > 150 → raw cut at 4, but check validity.
+	//   msg[4] is assistant → valid cut point. Cut = 4.
+	msgs := []fantasy.Message{
+		makeTextMessageN(fantasy.MessageRoleUser, 400),
+		makeTextMessageN(fantasy.MessageRoleAssistant, 400),
+		makeTextMessageN(fantasy.MessageRoleTool, 400),
+		makeTextMessageN(fantasy.MessageRoleUser, 400),
+		makeTextMessageN(fantasy.MessageRoleAssistant, 400),
+	}
+	got := FindCutPoint(msgs, 150)
+	if got != 4 {
+		t.Errorf("FindCutPoint() = %d, want 4", got)
+	}
+
+	// Now test where the raw cut lands on a tool result.
+	// [user, assistant, tool, tool, user]
+	// keepRecentTokens=50 → walk back: msg[4]=100 > 50 → raw cut at 5? No,
+	// i=4, accumulated=100 > 50, cut = i+1 = 5 → that's len(msgs), so no
+	// valid split. Actually let me think again...
+	// i starts at 4 (last), accumulated += 100 = 100 > 50 → cut = 5.
+	// cut=5 >= len(msgs)=5 → return 0. Correct.
+
+	// Try keepRecentTokens=150 → walk back:
+	//   msg[4] (user) = 100 ≤ 150
+	//   msg[3] (tool) = 200 > 150 → cut at 4, msg[4] is user → valid.
+	msgs2 := []fantasy.Message{
+		makeTextMessageN(fantasy.MessageRoleUser, 400),
+		makeTextMessageN(fantasy.MessageRoleAssistant, 400),
+		makeTextMessageN(fantasy.MessageRoleTool, 400),
+		makeTextMessageN(fantasy.MessageRoleTool, 400),
+		makeTextMessageN(fantasy.MessageRoleUser, 400),
+	}
+	got2 := FindCutPoint(msgs2, 150)
+	if got2 != 4 {
+		t.Errorf("FindCutPoint(tool results) = %d, want 4", got2)
+	}
+
+	// Where raw cut lands ON a tool message and must scan forward.
+	// [user(0), assistant(1), tool(2), tool(3), user(4), assistant(5)]
+	// keepRecentTokens=250 → walk back:
+	//   msg[5] = 100 ≤ 250
+	//   msg[4] = 200 ≤ 250
+	//   msg[3] = 300 > 250 → cut at 4, msg[4] is user → valid.
+	msgs3 := []fantasy.Message{
+		makeTextMessageN(fantasy.MessageRoleUser, 400),
+		makeTextMessageN(fantasy.MessageRoleAssistant, 400),
+		makeTextMessageN(fantasy.MessageRoleTool, 400),
+		makeTextMessageN(fantasy.MessageRoleTool, 400),
+		makeTextMessageN(fantasy.MessageRoleUser, 400),
+		makeTextMessageN(fantasy.MessageRoleAssistant, 400),
+	}
+	got3 := FindCutPoint(msgs3, 250)
+	if got3 != 4 {
+		t.Errorf("FindCutPoint(scan forward) = %d, want 4", got3)
+	}
+}
+
+// ---------------------------------------------------------------------------
+// CompactionOptions defaults
+// ---------------------------------------------------------------------------
+
 func TestCompactionOptions_Defaults(t *testing.T) {
 	opts := CompactionOptions{}
 	opts.defaults()

-	if opts.ThresholdPct != 0.8 {
-		t.Errorf("ThresholdPct = %f, want 0.8", opts.ThresholdPct)
+	if opts.ReserveTokens != 16384 {
+		t.Errorf("ReserveTokens = %d, want 16384", opts.ReserveTokens)
 	}
-	if opts.PreserveRecent != 10 {
-		t.Errorf("PreserveRecent = %d, want 10", opts.PreserveRecent)
+	if opts.KeepRecentTokens != 20000 {
+		t.Errorf("KeepRecentTokens = %d, want 20000", opts.KeepRecentTokens)
 	}
 }

 func TestCompactionOptions_DefaultsPreservesExisting(t *testing.T) {
-	opts := CompactionOptions{ThresholdPct: 0.9, PreserveRecent: 5}
+	opts := CompactionOptions{ReserveTokens: 8192, KeepRecentTokens: 10000}
 	opts.defaults()

-	if opts.ThresholdPct != 0.9 {
-		t.Errorf("ThresholdPct = %f, want 0.9", opts.ThresholdPct)
+	if opts.ReserveTokens != 8192 {
+		t.Errorf("ReserveTokens = %d, want 8192", opts.ReserveTokens)
 	}
-	if opts.PreserveRecent != 5 {
-		t.Errorf("PreserveRecent = %d, want 5", opts.PreserveRecent)
+	if opts.KeepRecentTokens != 10000 {
+		t.Errorf("KeepRecentTokens = %d, want 10000", opts.KeepRecentTokens)
 	}
 }

+// ---------------------------------------------------------------------------
+// Compact (integration — too few messages)
+// ---------------------------------------------------------------------------
+
 func TestCompact_TooFewMessages(t *testing.T) {
-	msgs := make([]fantasy.Message, 5)
-	for i := range msgs {
-		msgs[i] = makeTextMessage(fantasy.MessageRoleUser, "short")
+	msgs := []fantasy.Message{
+		makeTextMessageN(fantasy.MessageRoleUser, 400),
 	}

-	// Default preserveRecent = 10, so 5 messages is too few.
-	result, newMsgs, err := Compact(context.TODO(), nil, msgs, CompactionOptions{})
+	result, newMsgs, err := Compact(context.TODO(), nil, msgs, CompactionOptions{}, "")
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
@@ -150,3 +254,22 @@ func TestCompact_TooFewMessages(t *testing.T) {
 		t.Errorf("messages changed: got %d, want %d", len(newMsgs), len(msgs))
 	}
 }
+
+func TestCompact_WithinBudget(t *testing.T) {
+	// 2 messages, each 100 tokens, keepRecentTokens=20000 → all fit.
+	msgs := []fantasy.Message{
+		makeTextMessageN(fantasy.MessageRoleUser, 400),
+		makeTextMessageN(fantasy.MessageRoleAssistant, 400),
+	}
+
+	result, newMsgs, err := Compact(context.TODO(), nil, msgs, CompactionOptions{}, "")
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if result != nil {
+		t.Error("expected nil result when all messages fit within budget")
+	}
+	if len(newMsgs) != len(msgs) {
+		t.Errorf("messages changed: got %d, want %d", len(newMsgs), len(msgs))
+	}
+}
@@ -59,6 +59,12 @@ var SlashCommands = []SlashCommand{
 		Category:    "System",
 		Aliases:     []string{"/cq"},
 	},
+	{
+		Name:        "/compact",
+		Description: "Summarise older messages to free context space",
+		Category:    "System",
+		Aliases:     []string{"/co"},
+	},
 	{
 		Name:        "/quit",
 		Description: "Exit the application",
@@ -50,6 +50,12 @@ type AppController interface {
 	ClearQueue()
 	// ClearMessages clears the conversation history.
 	ClearMessages()
+	// CompactConversation summarises older messages to free context space.
+	// Runs asynchronously; results are delivered via CompactCompleteEvent or
+	// CompactErrorEvent sent through the registered tea.Program. Returns an
+	// error synchronously if compaction cannot be started (e.g. agent is busy).
+	// customInstructions is optional text appended to the summary prompt.
+	CompactConversation(customInstructions string) error
 	// GetTreeSession returns the tree session manager, or nil if tree sessions
 	// are not enabled. Used by slash commands like /tree, /fork, /session.
 	GetTreeSession() *session.TreeManager
@@ -497,6 +503,17 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 			return m, tea.Batch(cmds...)
 		}

+		// /compact supports optional args: "/compact Focus on API decisions".
+		// GetCommandByName won't match the full text, so check the prefix.
+		if name, args, ok := strings.Cut(msg.Text, " "); ok {
+			if sc := GetCommandByName(name); sc != nil && sc.Name == "/compact" {
+				if cmd := m.handleCompactCommand(strings.TrimSpace(args)); cmd != nil {
+					cmds = append(cmds, cmd)
+				}
+				return m, tea.Batch(cmds...)
+			}
+		}
+
 		// Check extension-registered slash commands. These support arguments
 		// (e.g. "/sub list files"), so we split on the first space.
 		if cmd := m.handleExtensionCommand(msg.Text); cmd != nil {
@@ -636,6 +653,20 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 		m.state = stateInput
 		m.canceling = false

+	case app.CompactCompleteEvent:
+		if m.stream != nil {
+			m.stream.Reset()
+		}
+		m.state = stateInput
+		cmds = append(cmds, m.printCompactResult(msg))
+
+	case app.CompactErrorEvent:
+		if m.stream != nil {
+			m.stream.Reset()
+		}
+		m.state = stateInput
+		cmds = append(cmds, m.printSystemMessage(fmt.Sprintf("Compaction failed: %v", msg.Err)))
+
 	case app.ExtensionPrintEvent:
 		// Extension output — route through styled renderers when a level is set.
 		switch msg.Level {
@@ -870,6 +901,8 @@ func (m *AppModel) handleSlashCommand(sc *SlashCommand) tea.Cmd {
 		return m.printUsageMessage()
 	case "/reset-usage":
 		return m.printResetUsage()
+	case "/compact":
+		return m.handleCompactCommand("")
 	case "/clear":
 		if m.appCtrl != nil {
 			m.appCtrl.ClearMessages()
@@ -987,6 +1020,7 @@ func (m *AppModel) printHelpMessage() tea.Cmd {
 		"- `/fork`: Branch from an earlier message\n" +
 		"- `/new`: Start a new branch (preserves history)\n\n" +
 		"**System:**\n" +
+		"- `/compact [instructions]`: Summarise older messages to free context space\n" +
 		"- `/clear`: Clear message history\n" +
 		"- `/reset-usage`: Reset usage statistics\n" +
 		"- `/quit`: Exit the application\n\n"
@@ -1080,6 +1114,54 @@ func (m *AppModel) printResetUsage() tea.Cmd {
 	return m.printSystemMessage("Usage statistics have been reset.")
 }

+// handleCompactCommand starts an async compaction. It returns a tea.Cmd that
+// prints a "compacting..." message and transitions to the working state. If
+// the app controller rejects the request (busy, closed) it prints an error
+// instead. customInstructions is optional text appended to the summary
+// prompt (e.g. "Focus on the API design decisions").
+func (m *AppModel) handleCompactCommand(customInstructions string) tea.Cmd {
+	if m.appCtrl == nil {
+		return m.printSystemMessage("Compaction is not available.")
+	}
+	if err := m.appCtrl.CompactConversation(customInstructions); err != nil {
+		return m.printSystemMessage(fmt.Sprintf("Cannot compact: %v", err))
+	}
+	// Transition to working state so the spinner shows while compaction runs.
+	m.state = stateWorking
+	var spinnerCmd tea.Cmd
+	if m.stream != nil {
+		_, spinnerCmd = m.stream.Update(app.SpinnerEvent{Show: true})
+	}
+	return tea.Batch(m.printSystemMessage("Compacting conversation..."), spinnerCmd)
+}
+
+// printCompactResult renders the compaction summary in a styled block with
+// a distinct border color and a stats subtitle.
+func (m *AppModel) printCompactResult(evt app.CompactCompleteEvent) tea.Cmd {
+	theme := GetTheme()
+
+	saved := evt.OriginalTokens - evt.CompactedTokens
+	subtitle := fmt.Sprintf(
+		"%d messages summarised, ~%dk tokens freed (%dk -> %dk)",
+		evt.MessagesRemoved, saved/1000, evt.OriginalTokens/1000, evt.CompactedTokens/1000,
+	)
+
+	content := evt.Summary
+	if subtitle != "" {
+		sub := lipgloss.NewStyle().Foreground(theme.VeryMuted).Render(" " + subtitle)
+		content = strings.TrimSuffix(content, "\n") + "\n\n" + sub
+	}
+
+	rendered := renderContentBlock(
+		content,
+		m.width,
+		WithAlign(lipgloss.Left),
+		WithBorderColor(theme.Secondary),
+		WithMarginBottom(1),
+	)
+	return tea.Println(rendered)
+}
+
 // flushStreamContent gets the rendered content from the stream component,
 // emits it above the BT region via tea.Println, and resets the stream. This
 // is called before printing tool calls (streaming completes before tools fire)
@@ -45,6 +45,10 @@ func (s *stubAppController) ClearMessages() {
 	s.clearMsgCalled++
 }

+func (s *stubAppController) CompactConversation(_ string) error {
+	return nil
+}
+
 func (s *stubAppController) GetTreeSession() *session.TreeManager {
 	return nil
 }
@@ -23,21 +23,22 @@ func (m *Kit) EstimateContextTokens() int {
 }

 // ShouldCompact reports whether the conversation is near the model's context
-// limit and should be compacted. Returns false if the model's context limit
-// is unknown or if no compaction options are configured.
+// limit and should be compacted. Uses Pi's formula:
+// contextTokens > contextWindow − reserveTokens.
+// Returns false if the model's context limit is unknown.
 func (m *Kit) ShouldCompact() bool {
 	info := m.GetModelInfo()
 	if info == nil || info.Limit.Context <= 0 {
 		return false
 	}

-	threshold := 0.8
-	if m.compactionOpts != nil && m.compactionOpts.ThresholdPct > 0 {
-		threshold = m.compactionOpts.ThresholdPct
+	reserveTokens := 16384
+	if m.compactionOpts != nil && m.compactionOpts.ReserveTokens > 0 {
+		reserveTokens = m.compactionOpts.ReserveTokens
 	}

 	messages := m.treeSession.GetFantasyMessages()
-	return compaction.ShouldCompact(messages, info.Limit.Context, threshold)
+	return compaction.ShouldCompact(messages, info.Limit.Context, reserveTokens)
 }

 // GetContextStats returns current context usage statistics including
@@ -61,13 +62,16 @@ func (m *Kit) GetContextStats() ContextStats {
 }

 // Compact summarises older messages to reduce context usage. If opts is nil,
-// the instance's CompactionOptions (or sensible defaults) are used. The model's
-// context limit is automatically populated from the model registry when
-// opts.ContextLimit is 0.
+// the instance's CompactionOptions (or sensible defaults) are used. The
+// model's context window is automatically populated from the model registry
+// when opts.ContextWindow is 0.
 //
-// After compaction, the tree session is cleared and replaced with the compacted
-// messages (summary + preserved recent messages).
-func (m *Kit) Compact(ctx context.Context, opts *CompactionOptions) (*CompactionResult, error) {
+// customInstructions is optional text appended to the summary prompt (e.g.
+// "Focus on the API design decisions"). Pass "" for the default prompt.
+//
+// After compaction, the tree session is cleared and replaced with the
+// compacted messages (summary + preserved recent messages).
+func (m *Kit) Compact(ctx context.Context, opts *CompactionOptions, customInstructions string) (*CompactionResult, error) {
 	if opts == nil {
 		if m.compactionOpts != nil {
 			opts = m.compactionOpts
@@ -76,25 +80,24 @@ func (m *Kit) Compact(ctx context.Context, opts *CompactionOptions) (*Compaction
 		}
 	}

-	// Auto-populate context limit from model info if not set.
-	if opts.ContextLimit <= 0 {
+	// Auto-populate context window from model info if not set.
+	if opts.ContextWindow <= 0 {
 		if info := m.GetModelInfo(); info != nil {
-			opts.ContextLimit = info.Limit.Context
+			opts.ContextWindow = info.Limit.Context
 		}
 	}

 	messages := m.treeSession.GetFantasyMessages()
-	if len(messages) == 0 {
-		return nil, fmt.Errorf("cannot compact: no messages in session")
+	if len(messages) < 2 {
+		return nil, fmt.Errorf("cannot compact: need at least 2 messages")
 	}

 	model := m.agent.GetModel()
-	result, newMessages, err := compaction.Compact(ctx, model, messages, *opts)
+	result, newMessages, err := compaction.Compact(ctx, model, messages, *opts, customInstructions)
 	if err != nil {
 		return nil, err
 	}
 	if result == nil {
-		// Nothing to compact (too few messages).
 		return nil, nil
 	}

@@ -540,7 +540,7 @@ func (m *Kit) runTurn(ctx context.Context, promptLabel string, prompt string, pr

 	// Auto-compact if enabled and conversation is near the context limit.
 	if m.autoCompact && m.ShouldCompact() {
-		_, _ = m.Compact(ctx, m.compactionOpts) // best-effort
+		_, _ = m.Compact(ctx, m.compactionOpts, "") // best-effort
 	}

 	// Build context from the tree so only the current branch is sent.