add /compact command with Pi-style token-based compaction

Rework compaction to match Pi's method:
- Token-based cut point (KeepRecentTokens=20k) instead of fixed message count
- Auto-trigger: contextTokens > contextWindow - reserveTokens (16k default)
- Pi's structured summary prompt (Goal/Progress/Decisions/Next Steps format)
- /compact [instructions] supports custom focus text
- Force compaction on manual request (only gate: >= 2 messages)
- Summary displayed in styled block with sky/cyan border and token stats
- Spinner properly animated during compaction
This commit is contained in:
Ed Zynda
2026-02-27 17:05:25 +03:00
parent 3804daa6fa
commit 215a3186ff
9 changed files with 597 additions and 131 deletions
+59
View File
@@ -169,6 +169,65 @@ func (a *App) GetTreeSession() *session.TreeManager {
return a.opts.TreeSession
}
// CompactConversation summarises older messages to free context space. It
// returns an error synchronously if compaction cannot start (agent busy or
// app closed). The actual compaction runs in a background goroutine and
// delivers CompactCompleteEvent or CompactErrorEvent through the registered
// tea.Program. customInstructions is optional text appended to the summary
// prompt (e.g. "Focus on the API design decisions").
//
// Satisfies ui.AppController.
func (a *App) CompactConversation(customInstructions string) error {
a.mu.Lock()
if a.closed {
a.mu.Unlock()
return fmt.Errorf("app is closed")
}
if a.busy {
a.mu.Unlock()
return fmt.Errorf("cannot compact while the agent is working")
}
if a.opts.Kit == nil {
a.mu.Unlock()
return fmt.Errorf("SDK instance not available")
}
a.busy = true
a.wg.Add(1)
a.mu.Unlock()
go func() {
defer a.wg.Done()
defer func() {
a.mu.Lock()
a.busy = false
a.mu.Unlock()
}()
result, err := a.opts.Kit.Compact(a.rootCtx, nil, customInstructions)
if err != nil {
a.sendEvent(CompactErrorEvent{Err: err})
return
}
if result == nil {
a.sendEvent(CompactErrorEvent{Err: fmt.Errorf("nothing to compact")})
return
}
// Sync in-memory store with the compacted session.
if a.opts.TreeSession != nil {
a.store.Replace(a.opts.TreeSession.GetFantasyMessages())
}
a.sendEvent(CompactCompleteEvent{
Summary: result.Summary,
OriginalTokens: result.OriginalTokens,
CompactedTokens: result.CompactedTokens,
MessagesRemoved: result.MessagesRemoved,
})
}()
return nil
}
// --------------------------------------------------------------------------
// Non-interactive execution
// --------------------------------------------------------------------------
+19
View File
@@ -94,6 +94,25 @@ type MessageCreatedEvent struct {
Message fantasy.Message
}
// CompactCompleteEvent is sent when a /compact operation finishes successfully.
// It carries the summary text and before/after statistics.
type CompactCompleteEvent struct {
// Summary is the LLM-generated structured summary of the compacted messages.
Summary string
// OriginalTokens is the estimated token count before compaction.
OriginalTokens int
// CompactedTokens is the estimated token count after compaction.
CompactedTokens int
// MessagesRemoved is the number of messages that were summarised away.
MessagesRemoved int
}
// CompactErrorEvent is sent when a /compact operation fails.
type CompactErrorEvent struct {
// Err is the error that caused compaction to fail.
Err error
}
// ExtensionPrintEvent is sent when an extension calls ctx.Print, ctx.PrintInfo,
// ctx.PrintError, or ctx.PrintBlock. The TUI renders it via the appropriate
// renderer and tea.Println (scrollback); the CLI handler uses
+240 -70
View File
@@ -1,5 +1,10 @@
// Package compaction provides context window management with token estimation,
// compaction triggers, and LLM-based conversation summarization.
//
// The algorithm mirrors Pi's approach: preserve a token budget of recent
// messages (KeepRecentTokens, default 20 000) rather than a fixed message
// count. Auto-compaction fires when estimated context usage exceeds
// contextWindow ReserveTokens.
package compaction
import (
@@ -10,36 +15,55 @@ import (
"charm.land/fantasy"
)
// ---------------------------------------------------------------------------
// Token estimation
// ---------------------------------------------------------------------------
// EstimateTokens provides a rough token count (~4 chars per token).
func EstimateTokens(text string) int {
return len(text) / 4
}
// EstimateMessageTokens estimates total tokens across a slice of fantasy messages
// by summing the estimated tokens for every text part.
// EstimateMessageTokens estimates total tokens across a slice of fantasy
// messages by summing the estimated tokens for every text part.
func EstimateMessageTokens(messages []fantasy.Message) int {
total := 0
for _, msg := range messages {
for _, part := range msg.Content {
if tp, ok := part.(fantasy.TextPart); ok {
total += EstimateTokens(tp.Text)
}
total += estimateSingleMessageTokens(msg)
}
return total
}
// estimateSingleMessageTokens returns the estimated token count for one
// message.
func estimateSingleMessageTokens(msg fantasy.Message) int {
total := 0
for _, part := range msg.Content {
if tp, ok := part.(fantasy.TextPart); ok {
total += EstimateTokens(tp.Text)
}
}
return total
}
// ShouldCompact reports whether the conversation exceeds the threshold
// percentage of the context limit. thresholdPct should be in the range 0.01.0
// (e.g. 0.8 means 80%).
func ShouldCompact(messages []fantasy.Message, contextLimit int, thresholdPct float64) bool {
if contextLimit <= 0 || thresholdPct <= 0 {
// ---------------------------------------------------------------------------
// Auto-compact trigger
// ---------------------------------------------------------------------------
// ShouldCompact reports whether auto-compaction should fire. It uses Pi's
// formula: contextTokens > contextWindow reserveTokens.
func ShouldCompact(messages []fantasy.Message, contextWindow int, reserveTokens int) bool {
if contextWindow <= 0 || reserveTokens <= 0 {
return false
}
estimated := EstimateMessageTokens(messages)
return float64(estimated) >= float64(contextLimit)*thresholdPct
return estimated > contextWindow-reserveTokens
}
// ---------------------------------------------------------------------------
// Options & defaults
// ---------------------------------------------------------------------------
// CompactionResult contains statistics from a compaction operation.
type CompactionResult struct {
Summary string // LLM-generated summary of compacted messages
@@ -48,74 +72,170 @@ type CompactionResult struct {
MessagesRemoved int // Number of messages replaced by the summary
}
// CompactionOptions configures compaction behaviour.
// CompactionOptions configures compaction behaviour. Pi-style token-based
// defaults are applied for zero-value fields.
type CompactionOptions struct {
ContextLimit int // Model's context window size (tokens)
ThresholdPct float64 // Trigger threshold (0.01.0), default 0.8
PreserveRecent int // Number of recent messages to keep, default 10
SummaryPrompt string // Custom summary prompt (empty = use default)
ContextWindow int // Model's context window size (tokens)
ReserveTokens int // Tokens to reserve for LLM response, default 16384
KeepRecentTokens int // Recent tokens to preserve (not summarised), default 20000
SummaryPrompt string // Custom summary prompt (empty = use default)
}
// defaults fills zero-value fields with sensible defaults.
// defaults fills zero-value fields with sensible Pi-style defaults.
func (o *CompactionOptions) defaults() {
if o.ThresholdPct <= 0 {
o.ThresholdPct = 0.8
if o.ReserveTokens <= 0 {
o.ReserveTokens = 16384
}
if o.PreserveRecent <= 0 {
o.PreserveRecent = 10
if o.KeepRecentTokens <= 0 {
o.KeepRecentTokens = 20000
}
}
// defaultSummaryPrompt is the system prompt used to summarise older messages.
const defaultSummaryPrompt = `You are a conversation summarizer. Summarize the following conversation messages into a concise summary that preserves:
1. Key decisions and conclusions reached
2. Important context and facts established
3. Current task state and progress
4. Any pending actions or open questions
// defaultSystemPrompt is the system prompt sent to the summarisation LLM.
// Matches Pi's compaction system prompt.
const defaultSystemPrompt = `You are a context summarization assistant. Your task is to read a conversation between a user and an AI coding assistant, then produce a structured summary following the exact format specified.
Be concise but thorough. Output only the summary text, no preamble.`
Do NOT continue the conversation. Do NOT respond to any questions in the conversation. ONLY output the structured summary.`
// FindCutPoint determines the index at which to cut messages for compaction.
// Messages before the cut point will be summarised; messages from the cut
// point onward are preserved. Returns 0 if no compaction is needed.
func FindCutPoint(messages []fantasy.Message, preserveRecent int) int {
if preserveRecent <= 0 {
preserveRecent = 10
}
if len(messages) <= preserveRecent {
return 0 // not enough messages to compact
}
return len(messages) - preserveRecent
// defaultSummaryPrompt is the user prompt appended after the serialised
// conversation. Matches Pi's initial-compaction format.
const defaultSummaryPrompt = `The messages above are a conversation to summarize. Create a structured context checkpoint summary that another LLM will use to continue the work.
Use this EXACT format:
## Goal
[What is the user trying to accomplish? Can be multiple items if the session covers different tasks.]
## Constraints & Preferences
- [Any constraints, preferences, or requirements mentioned by user]
- [Or "(none)" if none were mentioned]
## Progress
### Done
- [x] [Completed tasks/changes]
### In Progress
- [ ] [Current work]
### Blocked
- [Issues preventing progress, if any]
## Key Decisions
- **[Decision]**: [Brief rationale]
## Next Steps
1. [Ordered list of what should happen next]
## Critical Context
- [Any data, examples, or references needed to continue]
- [Or "(none)" if not applicable]
Keep each section concise. Preserve exact file paths, function names, and error messages.`
// ---------------------------------------------------------------------------
// Cut point (token-based, Pi-style)
// ---------------------------------------------------------------------------
// isValidCutPoint returns true if the message at index i is a valid place to
// split the conversation. Tool-role messages (tool results) must stay with
// their preceding assistant tool-call, so they are never valid cut points.
func isValidCutPoint(msg fantasy.Message) bool {
return msg.Role != fantasy.MessageRoleTool
}
// Compact summarises older messages using the LLM, returning the compaction
// result and a new message slice (summary message + preserved recent messages).
// FindCutPoint walks backward from the end of messages, accumulating tokens
// until the keepRecentTokens budget is filled. Returns the index that
// separates "old" messages (0..cutPoint-1, to be summarised) from "recent"
// messages (cutPoint..end, to be preserved).
//
// The model parameter is the same fantasy.LanguageModel used for regular
// generation — compaction creates a disposable fantasy agent with no tools to
// produce the summary.
func Compact(
ctx context.Context,
model fantasy.LanguageModel,
messages []fantasy.Message,
opts CompactionOptions,
) (*CompactionResult, []fantasy.Message, error) {
opts.defaults()
cutPoint := FindCutPoint(messages, opts.PreserveRecent)
if cutPoint == 0 {
return nil, messages, nil // nothing to compact
// Returns 0 if there are fewer than 2 messages or all messages fit within
// the keep budget.
func FindCutPoint(messages []fantasy.Message, keepRecentTokens int) int {
if len(messages) < 2 {
return 0
}
if keepRecentTokens <= 0 {
keepRecentTokens = 20000
}
oldMessages := messages[:cutPoint]
recentMessages := messages[cutPoint:]
originalTokens := EstimateMessageTokens(messages)
accumulated := 0
// Build a textual representation of the messages to summarise.
for i := len(messages) - 1; i >= 0; i-- {
accumulated += estimateSingleMessageTokens(messages[i])
if accumulated > keepRecentTokens {
cut := i + 1
// If the last message alone exceeds the budget, keep it
// anyway and summarise everything before it.
if cut >= len(messages) {
cut = len(messages) - 1
}
// Land on a valid cut point — scan forward past tool-result
// messages (they must stay with their preceding tool call).
for cut < len(messages) && !isValidCutPoint(messages[cut]) {
cut++
}
if cut >= len(messages) {
return 0
}
// Need at least 2 messages before the cut to produce a
// meaningful summary.
if cut < 2 {
return 0
}
return cut
}
}
// All messages fit within the budget — nothing to compact.
return 0
}
// forceCutPoint returns a cut point that keeps only the last non-tool
// message, summarising everything before it. Used when the budget-based
// FindCutPoint returns 0 but the caller wants to compact anyway (manual
// /compact). Returns 0 if no valid cut exists.
func forceCutPoint(messages []fantasy.Message) int {
// Walk backward to find the last valid (non-tool) message boundary.
for i := len(messages) - 1; i >= 2; i-- {
if isValidCutPoint(messages[i]) {
return i
}
}
return 0
}
// ---------------------------------------------------------------------------
// Message serialisation (Pi-style)
// ---------------------------------------------------------------------------
// roleLabel returns a human-readable label for a fantasy message role,
// matching Pi's serialisation format.
func roleLabel(role fantasy.MessageRole) string {
switch role {
case fantasy.MessageRoleUser:
return "[User]"
case fantasy.MessageRoleAssistant:
return "[Assistant]"
case fantasy.MessageRoleTool:
return "[Tool result]"
case fantasy.MessageRoleSystem:
return "[System]"
default:
return "[" + string(role) + "]"
}
}
// serializeMessages converts a slice of fantasy messages into a plain-text
// representation suitable for sending to the summarisation LLM. The format
// mirrors Pi's compaction serialisation.
func serializeMessages(messages []fantasy.Message) string {
var sb strings.Builder
for _, msg := range oldMessages {
sb.WriteString(string(msg.Role))
sb.WriteString(": ")
for _, msg := range messages {
sb.WriteString(roleLabel(msg.Role))
sb.WriteString(":\n")
for _, part := range msg.Content {
if tp, ok := part.(fantasy.TextPart); ok {
sb.WriteString(tp.Text)
@@ -123,20 +243,70 @@ func Compact(
}
sb.WriteString("\n\n")
}
conversationText := sb.String()
return sb.String()
}
// Use the provided (or default) summary prompt.
summaryPrompt := opts.SummaryPrompt
if summaryPrompt == "" {
summaryPrompt = defaultSummaryPrompt
// ---------------------------------------------------------------------------
// Compact
// ---------------------------------------------------------------------------
// Compact summarises older messages using the LLM, returning the compaction
// result and a new message slice (summary message + preserved recent
// messages).
//
// The model parameter is the same fantasy.LanguageModel used for regular
// generation — compaction creates a disposable fantasy agent with no tools to
// produce the summary.
//
// customInstructions is optional text appended to the summary prompt (e.g.
// "Focus on the API design decisions"). Pass "" to use the default prompt
// only.
func Compact(
ctx context.Context,
model fantasy.LanguageModel,
messages []fantasy.Message,
opts CompactionOptions,
customInstructions string,
) (*CompactionResult, []fantasy.Message, error) {
opts.defaults()
if len(messages) < 2 {
return nil, messages, nil
}
cutPoint := FindCutPoint(messages, opts.KeepRecentTokens)
if cutPoint == 0 {
// All messages fit within the keep budget. Force a cut that
// keeps only the last non-tool message — matching Pi, which
// always compacts when the user explicitly requests it.
cutPoint = forceCutPoint(messages)
if cutPoint == 0 {
return nil, messages, nil
}
}
oldMessages := messages[:cutPoint]
recentMessages := messages[cutPoint:]
originalTokens := EstimateMessageTokens(messages)
// Serialise old messages to text, matching Pi's format.
conversationText := serializeMessages(oldMessages)
// Build the user-facing prompt: conversation text + summary instructions.
userPrompt := opts.SummaryPrompt
if userPrompt == "" {
userPrompt = defaultSummaryPrompt
}
if customInstructions != "" {
userPrompt += "\n\nAdditional instructions: " + customInstructions
}
// Create a lightweight agent (no tools) just for summarisation.
summaryAgent := fantasy.NewAgent(model,
fantasy.WithSystemPrompt(summaryPrompt),
fantasy.WithSystemPrompt(defaultSystemPrompt),
)
result, err := summaryAgent.Generate(ctx, fantasy.AgentCall{
Prompt: conversationText,
Prompt: conversationText + "\n\n" + userPrompt,
})
if err != nil {
return nil, nil, fmt.Errorf("compaction summarisation failed: %w", err)
+164 -41
View File
@@ -2,6 +2,7 @@ package compaction
import (
"context"
"strings"
"testing"
"charm.land/fantasy"
@@ -14,6 +15,16 @@ func makeTextMessage(role fantasy.MessageRole, text string) fantasy.Message {
}
}
// makeTextMessageN creates a message whose text is exactly n characters long
// (≈ n/4 estimated tokens).
func makeTextMessageN(role fantasy.MessageRole, n int) fantasy.Message {
return makeTextMessage(role, strings.Repeat("a", n))
}
// ---------------------------------------------------------------------------
// Token estimation
// ---------------------------------------------------------------------------
func TestEstimateTokens(t *testing.T) {
tests := []struct {
text string
@@ -51,29 +62,30 @@ func TestEstimateMessageTokens_Empty(t *testing.T) {
}
}
// ---------------------------------------------------------------------------
// ShouldCompact (Pi-style: contextTokens > contextWindow - reserveTokens)
// ---------------------------------------------------------------------------
func TestShouldCompact(t *testing.T) {
// Create messages that total ~100 tokens (400 chars).
longText := make([]byte, 400)
for i := range longText {
longText[i] = 'a'
}
msgs := []fantasy.Message{makeTextMessage(fantasy.MessageRoleUser, string(longText))}
msgs := []fantasy.Message{makeTextMessageN(fantasy.MessageRoleUser, 400)}
tests := []struct {
name string
contextLimit int
threshold float64
want bool
name string
contextWindow int
reserveTokens int
want bool
}{
{"above threshold", 120, 0.8, true}, // 100 >= 120*0.8=96
{"below threshold", 200, 0.8, false}, // 100 < 200*0.8=160
{"zero limit", 0, 0.8, false}, // no limit
{"zero threshold", 200, 0.0, false}, // no threshold
{"exactly at threshold", 125, 0.8, true}, // 100 >= 125*0.8=100
{"above threshold", 110, 16, true}, // 100 > 110-16=94 → true
{"below threshold", 200, 16, false}, // 100 > 200-16=184 → false
{"zero window", 0, 16, false}, // no window
{"zero reserve", 200, 0, false}, // no reserve
{"exactly at threshold", 116, 16, false}, // 100 > 116-16=100 → false (not >)
{"one over", 115, 16, true}, // 100 > 115-16=99 → true
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := ShouldCompact(msgs, tt.contextLimit, tt.threshold)
got := ShouldCompact(msgs, tt.contextWindow, tt.reserveTokens)
if got != tt.want {
t.Errorf("ShouldCompact() = %v, want %v", got, tt.want)
}
@@ -81,26 +93,41 @@ func TestShouldCompact(t *testing.T) {
}
}
func TestFindCutPoint(t *testing.T) {
msgs := make([]fantasy.Message, 20)
// ---------------------------------------------------------------------------
// FindCutPoint (token-based, Pi-style)
// ---------------------------------------------------------------------------
func TestFindCutPoint_TokenBased(t *testing.T) {
// Each message is 400 chars = ~100 tokens.
msgs := make([]fantasy.Message, 10)
for i := range msgs {
msgs[i] = makeTextMessage(fantasy.MessageRoleUser, "msg")
if i%2 == 0 {
msgs[i] = makeTextMessageN(fantasy.MessageRoleUser, 400)
} else {
msgs[i] = makeTextMessageN(fantasy.MessageRoleAssistant, 400)
}
}
tests := []struct {
name string
preserveRecent int
want int
name string
keepRecentTokens int
want int // expected cut point
}{
{"preserve 10", 10, 10},
{"preserve 5", 5, 15},
{"preserve all", 20, 0},
{"preserve more than total", 25, 0},
{"preserve 0 uses default 10", 0, 10},
// keepRecentTokens=250 → walk back: msg[9]=100, msg[8]=200 ≤ 250,
// msg[7]=300 > 250 → cut = 8.
{"keep 250 tokens", 250, 8},
// keepRecentTokens=500 → walk back 5 msgs = 500 ≤ 500,
// 6th msg = 600 > 500 → cut = 5.
{"keep 500 tokens", 500, 5},
// keepRecentTokens=1000 → all 10 msgs = 1000, not exceeded → cut = 0.
{"keep all", 1000, 0},
// keepRecentTokens=50 → msg[9] alone = 100 > 50 → cut = 10,
// exceeds len → clamped to 9. 9 ≥ 2 → valid.
{"keep very few", 50, 9},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := FindCutPoint(msgs, tt.preserveRecent)
got := FindCutPoint(msgs, tt.keepRecentTokens)
if got != tt.want {
t.Errorf("FindCutPoint() = %d, want %d", got, tt.want)
}
@@ -108,38 +135,115 @@ func TestFindCutPoint(t *testing.T) {
}
}
func TestFindCutPoint_TooFewMessages(t *testing.T) {
msgs := []fantasy.Message{
makeTextMessageN(fantasy.MessageRoleUser, 400),
}
got := FindCutPoint(msgs, 50)
if got != 0 {
t.Errorf("FindCutPoint(1 msg) = %d, want 0", got)
}
}
func TestFindCutPoint_SkipsToolResults(t *testing.T) {
// [user, assistant, tool, user, assistant]
// Each 400 chars = 100 tokens. keepRecentTokens=150 → walk back:
// msg[4] (assistant) = 100 ≤ 150
// msg[3] (user) = 200 > 150 → raw cut at 4, but check validity.
// msg[4] is assistant → valid cut point. Cut = 4.
msgs := []fantasy.Message{
makeTextMessageN(fantasy.MessageRoleUser, 400),
makeTextMessageN(fantasy.MessageRoleAssistant, 400),
makeTextMessageN(fantasy.MessageRoleTool, 400),
makeTextMessageN(fantasy.MessageRoleUser, 400),
makeTextMessageN(fantasy.MessageRoleAssistant, 400),
}
got := FindCutPoint(msgs, 150)
if got != 4 {
t.Errorf("FindCutPoint() = %d, want 4", got)
}
// Now test where the raw cut lands on a tool result.
// [user, assistant, tool, tool, user]
// keepRecentTokens=50 → walk back: msg[4]=100 > 50 → raw cut at 5? No,
// i=4, accumulated=100 > 50, cut = i+1 = 5 → that's len(msgs), so no
// valid split. Actually let me think again...
// i starts at 4 (last), accumulated += 100 = 100 > 50 → cut = 5.
// cut=5 >= len(msgs)=5 → return 0. Correct.
// Try keepRecentTokens=150 → walk back:
// msg[4] (user) = 100 ≤ 150
// msg[3] (tool) = 200 > 150 → cut at 4, msg[4] is user → valid.
msgs2 := []fantasy.Message{
makeTextMessageN(fantasy.MessageRoleUser, 400),
makeTextMessageN(fantasy.MessageRoleAssistant, 400),
makeTextMessageN(fantasy.MessageRoleTool, 400),
makeTextMessageN(fantasy.MessageRoleTool, 400),
makeTextMessageN(fantasy.MessageRoleUser, 400),
}
got2 := FindCutPoint(msgs2, 150)
if got2 != 4 {
t.Errorf("FindCutPoint(tool results) = %d, want 4", got2)
}
// Where raw cut lands ON a tool message and must scan forward.
// [user(0), assistant(1), tool(2), tool(3), user(4), assistant(5)]
// keepRecentTokens=250 → walk back:
// msg[5] = 100 ≤ 250
// msg[4] = 200 ≤ 250
// msg[3] = 300 > 250 → cut at 4, msg[4] is user → valid.
msgs3 := []fantasy.Message{
makeTextMessageN(fantasy.MessageRoleUser, 400),
makeTextMessageN(fantasy.MessageRoleAssistant, 400),
makeTextMessageN(fantasy.MessageRoleTool, 400),
makeTextMessageN(fantasy.MessageRoleTool, 400),
makeTextMessageN(fantasy.MessageRoleUser, 400),
makeTextMessageN(fantasy.MessageRoleAssistant, 400),
}
got3 := FindCutPoint(msgs3, 250)
if got3 != 4 {
t.Errorf("FindCutPoint(scan forward) = %d, want 4", got3)
}
}
// ---------------------------------------------------------------------------
// CompactionOptions defaults
// ---------------------------------------------------------------------------
func TestCompactionOptions_Defaults(t *testing.T) {
opts := CompactionOptions{}
opts.defaults()
if opts.ThresholdPct != 0.8 {
t.Errorf("ThresholdPct = %f, want 0.8", opts.ThresholdPct)
if opts.ReserveTokens != 16384 {
t.Errorf("ReserveTokens = %d, want 16384", opts.ReserveTokens)
}
if opts.PreserveRecent != 10 {
t.Errorf("PreserveRecent = %d, want 10", opts.PreserveRecent)
if opts.KeepRecentTokens != 20000 {
t.Errorf("KeepRecentTokens = %d, want 20000", opts.KeepRecentTokens)
}
}
func TestCompactionOptions_DefaultsPreservesExisting(t *testing.T) {
opts := CompactionOptions{ThresholdPct: 0.9, PreserveRecent: 5}
opts := CompactionOptions{ReserveTokens: 8192, KeepRecentTokens: 10000}
opts.defaults()
if opts.ThresholdPct != 0.9 {
t.Errorf("ThresholdPct = %f, want 0.9", opts.ThresholdPct)
if opts.ReserveTokens != 8192 {
t.Errorf("ReserveTokens = %d, want 8192", opts.ReserveTokens)
}
if opts.PreserveRecent != 5 {
t.Errorf("PreserveRecent = %d, want 5", opts.PreserveRecent)
if opts.KeepRecentTokens != 10000 {
t.Errorf("KeepRecentTokens = %d, want 10000", opts.KeepRecentTokens)
}
}
// ---------------------------------------------------------------------------
// Compact (integration — too few messages)
// ---------------------------------------------------------------------------
func TestCompact_TooFewMessages(t *testing.T) {
msgs := make([]fantasy.Message, 5)
for i := range msgs {
msgs[i] = makeTextMessage(fantasy.MessageRoleUser, "short")
msgs := []fantasy.Message{
makeTextMessageN(fantasy.MessageRoleUser, 400),
}
// Default preserveRecent = 10, so 5 messages is too few.
result, newMsgs, err := Compact(context.TODO(), nil, msgs, CompactionOptions{})
result, newMsgs, err := Compact(context.TODO(), nil, msgs, CompactionOptions{}, "")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
@@ -150,3 +254,22 @@ func TestCompact_TooFewMessages(t *testing.T) {
t.Errorf("messages changed: got %d, want %d", len(newMsgs), len(msgs))
}
}
func TestCompact_WithinBudget(t *testing.T) {
// 2 messages, each 100 tokens, keepRecentTokens=20000 → all fit.
msgs := []fantasy.Message{
makeTextMessageN(fantasy.MessageRoleUser, 400),
makeTextMessageN(fantasy.MessageRoleAssistant, 400),
}
result, newMsgs, err := Compact(context.TODO(), nil, msgs, CompactionOptions{}, "")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result != nil {
t.Error("expected nil result when all messages fit within budget")
}
if len(newMsgs) != len(msgs) {
t.Errorf("messages changed: got %d, want %d", len(newMsgs), len(msgs))
}
}
+6
View File
@@ -59,6 +59,12 @@ var SlashCommands = []SlashCommand{
Category: "System",
Aliases: []string{"/cq"},
},
{
Name: "/compact",
Description: "Summarise older messages to free context space",
Category: "System",
Aliases: []string{"/co"},
},
{
Name: "/quit",
Description: "Exit the application",
+82
View File
@@ -50,6 +50,12 @@ type AppController interface {
ClearQueue()
// ClearMessages clears the conversation history.
ClearMessages()
// CompactConversation summarises older messages to free context space.
// Runs asynchronously; results are delivered via CompactCompleteEvent or
// CompactErrorEvent sent through the registered tea.Program. Returns an
// error synchronously if compaction cannot be started (e.g. agent is busy).
// customInstructions is optional text appended to the summary prompt.
CompactConversation(customInstructions string) error
// GetTreeSession returns the tree session manager, or nil if tree sessions
// are not enabled. Used by slash commands like /tree, /fork, /session.
GetTreeSession() *session.TreeManager
@@ -497,6 +503,17 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
return m, tea.Batch(cmds...)
}
// /compact supports optional args: "/compact Focus on API decisions".
// GetCommandByName won't match the full text, so check the prefix.
if name, args, ok := strings.Cut(msg.Text, " "); ok {
if sc := GetCommandByName(name); sc != nil && sc.Name == "/compact" {
if cmd := m.handleCompactCommand(strings.TrimSpace(args)); cmd != nil {
cmds = append(cmds, cmd)
}
return m, tea.Batch(cmds...)
}
}
// Check extension-registered slash commands. These support arguments
// (e.g. "/sub list files"), so we split on the first space.
if cmd := m.handleExtensionCommand(msg.Text); cmd != nil {
@@ -636,6 +653,20 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
m.state = stateInput
m.canceling = false
case app.CompactCompleteEvent:
if m.stream != nil {
m.stream.Reset()
}
m.state = stateInput
cmds = append(cmds, m.printCompactResult(msg))
case app.CompactErrorEvent:
if m.stream != nil {
m.stream.Reset()
}
m.state = stateInput
cmds = append(cmds, m.printSystemMessage(fmt.Sprintf("Compaction failed: %v", msg.Err)))
case app.ExtensionPrintEvent:
// Extension output — route through styled renderers when a level is set.
switch msg.Level {
@@ -870,6 +901,8 @@ func (m *AppModel) handleSlashCommand(sc *SlashCommand) tea.Cmd {
return m.printUsageMessage()
case "/reset-usage":
return m.printResetUsage()
case "/compact":
return m.handleCompactCommand("")
case "/clear":
if m.appCtrl != nil {
m.appCtrl.ClearMessages()
@@ -987,6 +1020,7 @@ func (m *AppModel) printHelpMessage() tea.Cmd {
"- `/fork`: Branch from an earlier message\n" +
"- `/new`: Start a new branch (preserves history)\n\n" +
"**System:**\n" +
"- `/compact [instructions]`: Summarise older messages to free context space\n" +
"- `/clear`: Clear message history\n" +
"- `/reset-usage`: Reset usage statistics\n" +
"- `/quit`: Exit the application\n\n"
@@ -1080,6 +1114,54 @@ func (m *AppModel) printResetUsage() tea.Cmd {
return m.printSystemMessage("Usage statistics have been reset.")
}
// handleCompactCommand starts an async compaction. It returns a tea.Cmd that
// prints a "compacting..." message and transitions to the working state. If
// the app controller rejects the request (busy, closed) it prints an error
// instead. customInstructions is optional text appended to the summary
// prompt (e.g. "Focus on the API design decisions").
func (m *AppModel) handleCompactCommand(customInstructions string) tea.Cmd {
if m.appCtrl == nil {
return m.printSystemMessage("Compaction is not available.")
}
if err := m.appCtrl.CompactConversation(customInstructions); err != nil {
return m.printSystemMessage(fmt.Sprintf("Cannot compact: %v", err))
}
// Transition to working state so the spinner shows while compaction runs.
m.state = stateWorking
var spinnerCmd tea.Cmd
if m.stream != nil {
_, spinnerCmd = m.stream.Update(app.SpinnerEvent{Show: true})
}
return tea.Batch(m.printSystemMessage("Compacting conversation..."), spinnerCmd)
}
// printCompactResult renders the compaction summary in a styled block with
// a distinct border color and a stats subtitle.
func (m *AppModel) printCompactResult(evt app.CompactCompleteEvent) tea.Cmd {
theme := GetTheme()
saved := evt.OriginalTokens - evt.CompactedTokens
subtitle := fmt.Sprintf(
"%d messages summarised, ~%dk tokens freed (%dk -> %dk)",
evt.MessagesRemoved, saved/1000, evt.OriginalTokens/1000, evt.CompactedTokens/1000,
)
content := evt.Summary
if subtitle != "" {
sub := lipgloss.NewStyle().Foreground(theme.VeryMuted).Render(" " + subtitle)
content = strings.TrimSuffix(content, "\n") + "\n\n" + sub
}
rendered := renderContentBlock(
content,
m.width,
WithAlign(lipgloss.Left),
WithBorderColor(theme.Secondary),
WithMarginBottom(1),
)
return tea.Println(rendered)
}
// flushStreamContent gets the rendered content from the stream component,
// emits it above the BT region via tea.Println, and resets the stream. This
// is called before printing tool calls (streaming completes before tools fire)
+4
View File
@@ -45,6 +45,10 @@ func (s *stubAppController) ClearMessages() {
s.clearMsgCalled++
}
func (s *stubAppController) CompactConversation(_ string) error {
return nil
}
func (s *stubAppController) GetTreeSession() *session.TreeManager {
return nil
}
+22 -19
View File
@@ -23,21 +23,22 @@ func (m *Kit) EstimateContextTokens() int {
}
// ShouldCompact reports whether the conversation is near the model's context
// limit and should be compacted. Returns false if the model's context limit
// is unknown or if no compaction options are configured.
// limit and should be compacted. Uses Pi's formula:
// contextTokens > contextWindow reserveTokens.
// Returns false if the model's context limit is unknown.
func (m *Kit) ShouldCompact() bool {
info := m.GetModelInfo()
if info == nil || info.Limit.Context <= 0 {
return false
}
threshold := 0.8
if m.compactionOpts != nil && m.compactionOpts.ThresholdPct > 0 {
threshold = m.compactionOpts.ThresholdPct
reserveTokens := 16384
if m.compactionOpts != nil && m.compactionOpts.ReserveTokens > 0 {
reserveTokens = m.compactionOpts.ReserveTokens
}
messages := m.treeSession.GetFantasyMessages()
return compaction.ShouldCompact(messages, info.Limit.Context, threshold)
return compaction.ShouldCompact(messages, info.Limit.Context, reserveTokens)
}
// GetContextStats returns current context usage statistics including
@@ -61,13 +62,16 @@ func (m *Kit) GetContextStats() ContextStats {
}
// Compact summarises older messages to reduce context usage. If opts is nil,
// the instance's CompactionOptions (or sensible defaults) are used. The model's
// context limit is automatically populated from the model registry when
// opts.ContextLimit is 0.
// the instance's CompactionOptions (or sensible defaults) are used. The
// model's context window is automatically populated from the model registry
// when opts.ContextWindow is 0.
//
// After compaction, the tree session is cleared and replaced with the compacted
// messages (summary + preserved recent messages).
func (m *Kit) Compact(ctx context.Context, opts *CompactionOptions) (*CompactionResult, error) {
// customInstructions is optional text appended to the summary prompt (e.g.
// "Focus on the API design decisions"). Pass "" for the default prompt.
//
// After compaction, the tree session is cleared and replaced with the
// compacted messages (summary + preserved recent messages).
func (m *Kit) Compact(ctx context.Context, opts *CompactionOptions, customInstructions string) (*CompactionResult, error) {
if opts == nil {
if m.compactionOpts != nil {
opts = m.compactionOpts
@@ -76,25 +80,24 @@ func (m *Kit) Compact(ctx context.Context, opts *CompactionOptions) (*Compaction
}
}
// Auto-populate context limit from model info if not set.
if opts.ContextLimit <= 0 {
// Auto-populate context window from model info if not set.
if opts.ContextWindow <= 0 {
if info := m.GetModelInfo(); info != nil {
opts.ContextLimit = info.Limit.Context
opts.ContextWindow = info.Limit.Context
}
}
messages := m.treeSession.GetFantasyMessages()
if len(messages) == 0 {
return nil, fmt.Errorf("cannot compact: no messages in session")
if len(messages) < 2 {
return nil, fmt.Errorf("cannot compact: need at least 2 messages")
}
model := m.agent.GetModel()
result, newMessages, err := compaction.Compact(ctx, model, messages, *opts)
result, newMessages, err := compaction.Compact(ctx, model, messages, *opts, customInstructions)
if err != nil {
return nil, err
}
if result == nil {
// Nothing to compact (too few messages).
return nil, nil
}
+1 -1
View File
@@ -540,7 +540,7 @@ func (m *Kit) runTurn(ctx context.Context, promptLabel string, prompt string, pr
// Auto-compact if enabled and conversation is near the context limit.
if m.autoCompact && m.ShouldCompact() {
_, _ = m.Compact(ctx, m.compactionOpts) // best-effort
_, _ = m.Compact(ctx, m.compactionOpts, "") // best-effort
}
// Build context from the tree so only the current branch is sent.