mirror of
https://github.com/mark3labs/kit.git
synced 2026-06-14 03:30:26 +00:00
add /compact command with Pi-style token-based compaction
Rework compaction to match Pi's method: - Token-based cut point (KeepRecentTokens=20k) instead of fixed message count - Auto-trigger: contextTokens > contextWindow - reserveTokens (16k default) - Pi's structured summary prompt (Goal/Progress/Decisions/Next Steps format) - /compact [instructions] supports custom focus text - Force compaction on manual request (only gate: >= 2 messages) - Summary displayed in styled block with sky/cyan border and token stats - Spinner properly animated during compaction
This commit is contained in:
@@ -169,6 +169,65 @@ func (a *App) GetTreeSession() *session.TreeManager {
|
||||
return a.opts.TreeSession
|
||||
}
|
||||
|
||||
// CompactConversation summarises older messages to free context space. It
|
||||
// returns an error synchronously if compaction cannot start (agent busy or
|
||||
// app closed). The actual compaction runs in a background goroutine and
|
||||
// delivers CompactCompleteEvent or CompactErrorEvent through the registered
|
||||
// tea.Program. customInstructions is optional text appended to the summary
|
||||
// prompt (e.g. "Focus on the API design decisions").
|
||||
//
|
||||
// Satisfies ui.AppController.
|
||||
func (a *App) CompactConversation(customInstructions string) error {
|
||||
a.mu.Lock()
|
||||
if a.closed {
|
||||
a.mu.Unlock()
|
||||
return fmt.Errorf("app is closed")
|
||||
}
|
||||
if a.busy {
|
||||
a.mu.Unlock()
|
||||
return fmt.Errorf("cannot compact while the agent is working")
|
||||
}
|
||||
if a.opts.Kit == nil {
|
||||
a.mu.Unlock()
|
||||
return fmt.Errorf("SDK instance not available")
|
||||
}
|
||||
a.busy = true
|
||||
a.wg.Add(1)
|
||||
a.mu.Unlock()
|
||||
|
||||
go func() {
|
||||
defer a.wg.Done()
|
||||
defer func() {
|
||||
a.mu.Lock()
|
||||
a.busy = false
|
||||
a.mu.Unlock()
|
||||
}()
|
||||
|
||||
result, err := a.opts.Kit.Compact(a.rootCtx, nil, customInstructions)
|
||||
if err != nil {
|
||||
a.sendEvent(CompactErrorEvent{Err: err})
|
||||
return
|
||||
}
|
||||
if result == nil {
|
||||
a.sendEvent(CompactErrorEvent{Err: fmt.Errorf("nothing to compact")})
|
||||
return
|
||||
}
|
||||
|
||||
// Sync in-memory store with the compacted session.
|
||||
if a.opts.TreeSession != nil {
|
||||
a.store.Replace(a.opts.TreeSession.GetFantasyMessages())
|
||||
}
|
||||
|
||||
a.sendEvent(CompactCompleteEvent{
|
||||
Summary: result.Summary,
|
||||
OriginalTokens: result.OriginalTokens,
|
||||
CompactedTokens: result.CompactedTokens,
|
||||
MessagesRemoved: result.MessagesRemoved,
|
||||
})
|
||||
}()
|
||||
return nil
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// Non-interactive execution
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
@@ -94,6 +94,25 @@ type MessageCreatedEvent struct {
|
||||
Message fantasy.Message
|
||||
}
|
||||
|
||||
// CompactCompleteEvent is sent when a /compact operation finishes successfully.
|
||||
// It carries the summary text and before/after statistics.
|
||||
type CompactCompleteEvent struct {
|
||||
// Summary is the LLM-generated structured summary of the compacted messages.
|
||||
Summary string
|
||||
// OriginalTokens is the estimated token count before compaction.
|
||||
OriginalTokens int
|
||||
// CompactedTokens is the estimated token count after compaction.
|
||||
CompactedTokens int
|
||||
// MessagesRemoved is the number of messages that were summarised away.
|
||||
MessagesRemoved int
|
||||
}
|
||||
|
||||
// CompactErrorEvent is sent when a /compact operation fails.
|
||||
type CompactErrorEvent struct {
|
||||
// Err is the error that caused compaction to fail.
|
||||
Err error
|
||||
}
|
||||
|
||||
// ExtensionPrintEvent is sent when an extension calls ctx.Print, ctx.PrintInfo,
|
||||
// ctx.PrintError, or ctx.PrintBlock. The TUI renders it via the appropriate
|
||||
// renderer and tea.Println (scrollback); the CLI handler uses
|
||||
|
||||
@@ -1,5 +1,10 @@
|
||||
// Package compaction provides context window management with token estimation,
|
||||
// compaction triggers, and LLM-based conversation summarization.
|
||||
//
|
||||
// The algorithm mirrors Pi's approach: preserve a token budget of recent
|
||||
// messages (KeepRecentTokens, default 20 000) rather than a fixed message
|
||||
// count. Auto-compaction fires when estimated context usage exceeds
|
||||
// contextWindow − ReserveTokens.
|
||||
package compaction
|
||||
|
||||
import (
|
||||
@@ -10,36 +15,55 @@ import (
|
||||
"charm.land/fantasy"
|
||||
)
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Token estimation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// EstimateTokens provides a rough token count (~4 chars per token).
|
||||
func EstimateTokens(text string) int {
|
||||
return len(text) / 4
|
||||
}
|
||||
|
||||
// EstimateMessageTokens estimates total tokens across a slice of fantasy messages
|
||||
// by summing the estimated tokens for every text part.
|
||||
// EstimateMessageTokens estimates total tokens across a slice of fantasy
|
||||
// messages by summing the estimated tokens for every text part.
|
||||
func EstimateMessageTokens(messages []fantasy.Message) int {
|
||||
total := 0
|
||||
for _, msg := range messages {
|
||||
for _, part := range msg.Content {
|
||||
if tp, ok := part.(fantasy.TextPart); ok {
|
||||
total += EstimateTokens(tp.Text)
|
||||
}
|
||||
total += estimateSingleMessageTokens(msg)
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// estimateSingleMessageTokens returns the estimated token count for one
|
||||
// message.
|
||||
func estimateSingleMessageTokens(msg fantasy.Message) int {
|
||||
total := 0
|
||||
for _, part := range msg.Content {
|
||||
if tp, ok := part.(fantasy.TextPart); ok {
|
||||
total += EstimateTokens(tp.Text)
|
||||
}
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// ShouldCompact reports whether the conversation exceeds the threshold
|
||||
// percentage of the context limit. thresholdPct should be in the range 0.0–1.0
|
||||
// (e.g. 0.8 means 80%).
|
||||
func ShouldCompact(messages []fantasy.Message, contextLimit int, thresholdPct float64) bool {
|
||||
if contextLimit <= 0 || thresholdPct <= 0 {
|
||||
// ---------------------------------------------------------------------------
|
||||
// Auto-compact trigger
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// ShouldCompact reports whether auto-compaction should fire. It uses Pi's
|
||||
// formula: contextTokens > contextWindow − reserveTokens.
|
||||
func ShouldCompact(messages []fantasy.Message, contextWindow int, reserveTokens int) bool {
|
||||
if contextWindow <= 0 || reserveTokens <= 0 {
|
||||
return false
|
||||
}
|
||||
estimated := EstimateMessageTokens(messages)
|
||||
return float64(estimated) >= float64(contextLimit)*thresholdPct
|
||||
return estimated > contextWindow-reserveTokens
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Options & defaults
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// CompactionResult contains statistics from a compaction operation.
|
||||
type CompactionResult struct {
|
||||
Summary string // LLM-generated summary of compacted messages
|
||||
@@ -48,74 +72,170 @@ type CompactionResult struct {
|
||||
MessagesRemoved int // Number of messages replaced by the summary
|
||||
}
|
||||
|
||||
// CompactionOptions configures compaction behaviour.
|
||||
// CompactionOptions configures compaction behaviour. Pi-style token-based
|
||||
// defaults are applied for zero-value fields.
|
||||
type CompactionOptions struct {
|
||||
ContextLimit int // Model's context window size (tokens)
|
||||
ThresholdPct float64 // Trigger threshold (0.0–1.0), default 0.8
|
||||
PreserveRecent int // Number of recent messages to keep, default 10
|
||||
SummaryPrompt string // Custom summary prompt (empty = use default)
|
||||
ContextWindow int // Model's context window size (tokens)
|
||||
ReserveTokens int // Tokens to reserve for LLM response, default 16384
|
||||
KeepRecentTokens int // Recent tokens to preserve (not summarised), default 20000
|
||||
SummaryPrompt string // Custom summary prompt (empty = use default)
|
||||
}
|
||||
|
||||
// defaults fills zero-value fields with sensible defaults.
|
||||
// defaults fills zero-value fields with sensible Pi-style defaults.
|
||||
func (o *CompactionOptions) defaults() {
|
||||
if o.ThresholdPct <= 0 {
|
||||
o.ThresholdPct = 0.8
|
||||
if o.ReserveTokens <= 0 {
|
||||
o.ReserveTokens = 16384
|
||||
}
|
||||
if o.PreserveRecent <= 0 {
|
||||
o.PreserveRecent = 10
|
||||
if o.KeepRecentTokens <= 0 {
|
||||
o.KeepRecentTokens = 20000
|
||||
}
|
||||
}
|
||||
|
||||
// defaultSummaryPrompt is the system prompt used to summarise older messages.
|
||||
const defaultSummaryPrompt = `You are a conversation summarizer. Summarize the following conversation messages into a concise summary that preserves:
|
||||
1. Key decisions and conclusions reached
|
||||
2. Important context and facts established
|
||||
3. Current task state and progress
|
||||
4. Any pending actions or open questions
|
||||
// defaultSystemPrompt is the system prompt sent to the summarisation LLM.
|
||||
// Matches Pi's compaction system prompt.
|
||||
const defaultSystemPrompt = `You are a context summarization assistant. Your task is to read a conversation between a user and an AI coding assistant, then produce a structured summary following the exact format specified.
|
||||
|
||||
Be concise but thorough. Output only the summary text, no preamble.`
|
||||
Do NOT continue the conversation. Do NOT respond to any questions in the conversation. ONLY output the structured summary.`
|
||||
|
||||
// FindCutPoint determines the index at which to cut messages for compaction.
|
||||
// Messages before the cut point will be summarised; messages from the cut
|
||||
// point onward are preserved. Returns 0 if no compaction is needed.
|
||||
func FindCutPoint(messages []fantasy.Message, preserveRecent int) int {
|
||||
if preserveRecent <= 0 {
|
||||
preserveRecent = 10
|
||||
}
|
||||
if len(messages) <= preserveRecent {
|
||||
return 0 // not enough messages to compact
|
||||
}
|
||||
return len(messages) - preserveRecent
|
||||
// defaultSummaryPrompt is the user prompt appended after the serialised
|
||||
// conversation. Matches Pi's initial-compaction format.
|
||||
const defaultSummaryPrompt = `The messages above are a conversation to summarize. Create a structured context checkpoint summary that another LLM will use to continue the work.
|
||||
|
||||
Use this EXACT format:
|
||||
|
||||
## Goal
|
||||
[What is the user trying to accomplish? Can be multiple items if the session covers different tasks.]
|
||||
|
||||
## Constraints & Preferences
|
||||
- [Any constraints, preferences, or requirements mentioned by user]
|
||||
- [Or "(none)" if none were mentioned]
|
||||
|
||||
## Progress
|
||||
### Done
|
||||
- [x] [Completed tasks/changes]
|
||||
|
||||
### In Progress
|
||||
- [ ] [Current work]
|
||||
|
||||
### Blocked
|
||||
- [Issues preventing progress, if any]
|
||||
|
||||
## Key Decisions
|
||||
- **[Decision]**: [Brief rationale]
|
||||
|
||||
## Next Steps
|
||||
1. [Ordered list of what should happen next]
|
||||
|
||||
## Critical Context
|
||||
- [Any data, examples, or references needed to continue]
|
||||
- [Or "(none)" if not applicable]
|
||||
|
||||
Keep each section concise. Preserve exact file paths, function names, and error messages.`
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Cut point (token-based, Pi-style)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// isValidCutPoint returns true if the message at index i is a valid place to
|
||||
// split the conversation. Tool-role messages (tool results) must stay with
|
||||
// their preceding assistant tool-call, so they are never valid cut points.
|
||||
func isValidCutPoint(msg fantasy.Message) bool {
|
||||
return msg.Role != fantasy.MessageRoleTool
|
||||
}
|
||||
|
||||
// Compact summarises older messages using the LLM, returning the compaction
|
||||
// result and a new message slice (summary message + preserved recent messages).
|
||||
// FindCutPoint walks backward from the end of messages, accumulating tokens
|
||||
// until the keepRecentTokens budget is filled. Returns the index that
|
||||
// separates "old" messages (0..cutPoint-1, to be summarised) from "recent"
|
||||
// messages (cutPoint..end, to be preserved).
|
||||
//
|
||||
// The model parameter is the same fantasy.LanguageModel used for regular
|
||||
// generation — compaction creates a disposable fantasy agent with no tools to
|
||||
// produce the summary.
|
||||
func Compact(
|
||||
ctx context.Context,
|
||||
model fantasy.LanguageModel,
|
||||
messages []fantasy.Message,
|
||||
opts CompactionOptions,
|
||||
) (*CompactionResult, []fantasy.Message, error) {
|
||||
opts.defaults()
|
||||
|
||||
cutPoint := FindCutPoint(messages, opts.PreserveRecent)
|
||||
if cutPoint == 0 {
|
||||
return nil, messages, nil // nothing to compact
|
||||
// Returns 0 if there are fewer than 2 messages or all messages fit within
|
||||
// the keep budget.
|
||||
func FindCutPoint(messages []fantasy.Message, keepRecentTokens int) int {
|
||||
if len(messages) < 2 {
|
||||
return 0
|
||||
}
|
||||
if keepRecentTokens <= 0 {
|
||||
keepRecentTokens = 20000
|
||||
}
|
||||
|
||||
oldMessages := messages[:cutPoint]
|
||||
recentMessages := messages[cutPoint:]
|
||||
originalTokens := EstimateMessageTokens(messages)
|
||||
accumulated := 0
|
||||
|
||||
// Build a textual representation of the messages to summarise.
|
||||
for i := len(messages) - 1; i >= 0; i-- {
|
||||
accumulated += estimateSingleMessageTokens(messages[i])
|
||||
if accumulated > keepRecentTokens {
|
||||
cut := i + 1
|
||||
|
||||
// If the last message alone exceeds the budget, keep it
|
||||
// anyway and summarise everything before it.
|
||||
if cut >= len(messages) {
|
||||
cut = len(messages) - 1
|
||||
}
|
||||
|
||||
// Land on a valid cut point — scan forward past tool-result
|
||||
// messages (they must stay with their preceding tool call).
|
||||
for cut < len(messages) && !isValidCutPoint(messages[cut]) {
|
||||
cut++
|
||||
}
|
||||
if cut >= len(messages) {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Need at least 2 messages before the cut to produce a
|
||||
// meaningful summary.
|
||||
if cut < 2 {
|
||||
return 0
|
||||
}
|
||||
return cut
|
||||
}
|
||||
}
|
||||
|
||||
// All messages fit within the budget — nothing to compact.
|
||||
return 0
|
||||
}
|
||||
|
||||
// forceCutPoint returns a cut point that keeps only the last non-tool
|
||||
// message, summarising everything before it. Used when the budget-based
|
||||
// FindCutPoint returns 0 but the caller wants to compact anyway (manual
|
||||
// /compact). Returns 0 if no valid cut exists.
|
||||
func forceCutPoint(messages []fantasy.Message) int {
|
||||
// Walk backward to find the last valid (non-tool) message boundary.
|
||||
for i := len(messages) - 1; i >= 2; i-- {
|
||||
if isValidCutPoint(messages[i]) {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Message serialisation (Pi-style)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// roleLabel returns a human-readable label for a fantasy message role,
|
||||
// matching Pi's serialisation format.
|
||||
func roleLabel(role fantasy.MessageRole) string {
|
||||
switch role {
|
||||
case fantasy.MessageRoleUser:
|
||||
return "[User]"
|
||||
case fantasy.MessageRoleAssistant:
|
||||
return "[Assistant]"
|
||||
case fantasy.MessageRoleTool:
|
||||
return "[Tool result]"
|
||||
case fantasy.MessageRoleSystem:
|
||||
return "[System]"
|
||||
default:
|
||||
return "[" + string(role) + "]"
|
||||
}
|
||||
}
|
||||
|
||||
// serializeMessages converts a slice of fantasy messages into a plain-text
|
||||
// representation suitable for sending to the summarisation LLM. The format
|
||||
// mirrors Pi's compaction serialisation.
|
||||
func serializeMessages(messages []fantasy.Message) string {
|
||||
var sb strings.Builder
|
||||
for _, msg := range oldMessages {
|
||||
sb.WriteString(string(msg.Role))
|
||||
sb.WriteString(": ")
|
||||
for _, msg := range messages {
|
||||
sb.WriteString(roleLabel(msg.Role))
|
||||
sb.WriteString(":\n")
|
||||
for _, part := range msg.Content {
|
||||
if tp, ok := part.(fantasy.TextPart); ok {
|
||||
sb.WriteString(tp.Text)
|
||||
@@ -123,20 +243,70 @@ func Compact(
|
||||
}
|
||||
sb.WriteString("\n\n")
|
||||
}
|
||||
conversationText := sb.String()
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
// Use the provided (or default) summary prompt.
|
||||
summaryPrompt := opts.SummaryPrompt
|
||||
if summaryPrompt == "" {
|
||||
summaryPrompt = defaultSummaryPrompt
|
||||
// ---------------------------------------------------------------------------
|
||||
// Compact
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// Compact summarises older messages using the LLM, returning the compaction
|
||||
// result and a new message slice (summary message + preserved recent
|
||||
// messages).
|
||||
//
|
||||
// The model parameter is the same fantasy.LanguageModel used for regular
|
||||
// generation — compaction creates a disposable fantasy agent with no tools to
|
||||
// produce the summary.
|
||||
//
|
||||
// customInstructions is optional text appended to the summary prompt (e.g.
|
||||
// "Focus on the API design decisions"). Pass "" to use the default prompt
|
||||
// only.
|
||||
func Compact(
|
||||
ctx context.Context,
|
||||
model fantasy.LanguageModel,
|
||||
messages []fantasy.Message,
|
||||
opts CompactionOptions,
|
||||
customInstructions string,
|
||||
) (*CompactionResult, []fantasy.Message, error) {
|
||||
opts.defaults()
|
||||
|
||||
if len(messages) < 2 {
|
||||
return nil, messages, nil
|
||||
}
|
||||
|
||||
cutPoint := FindCutPoint(messages, opts.KeepRecentTokens)
|
||||
if cutPoint == 0 {
|
||||
// All messages fit within the keep budget. Force a cut that
|
||||
// keeps only the last non-tool message — matching Pi, which
|
||||
// always compacts when the user explicitly requests it.
|
||||
cutPoint = forceCutPoint(messages)
|
||||
if cutPoint == 0 {
|
||||
return nil, messages, nil
|
||||
}
|
||||
}
|
||||
|
||||
oldMessages := messages[:cutPoint]
|
||||
recentMessages := messages[cutPoint:]
|
||||
originalTokens := EstimateMessageTokens(messages)
|
||||
|
||||
// Serialise old messages to text, matching Pi's format.
|
||||
conversationText := serializeMessages(oldMessages)
|
||||
|
||||
// Build the user-facing prompt: conversation text + summary instructions.
|
||||
userPrompt := opts.SummaryPrompt
|
||||
if userPrompt == "" {
|
||||
userPrompt = defaultSummaryPrompt
|
||||
}
|
||||
if customInstructions != "" {
|
||||
userPrompt += "\n\nAdditional instructions: " + customInstructions
|
||||
}
|
||||
|
||||
// Create a lightweight agent (no tools) just for summarisation.
|
||||
summaryAgent := fantasy.NewAgent(model,
|
||||
fantasy.WithSystemPrompt(summaryPrompt),
|
||||
fantasy.WithSystemPrompt(defaultSystemPrompt),
|
||||
)
|
||||
result, err := summaryAgent.Generate(ctx, fantasy.AgentCall{
|
||||
Prompt: conversationText,
|
||||
Prompt: conversationText + "\n\n" + userPrompt,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("compaction summarisation failed: %w", err)
|
||||
|
||||
@@ -2,6 +2,7 @@ package compaction
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"charm.land/fantasy"
|
||||
@@ -14,6 +15,16 @@ func makeTextMessage(role fantasy.MessageRole, text string) fantasy.Message {
|
||||
}
|
||||
}
|
||||
|
||||
// makeTextMessageN creates a message whose text is exactly n characters long
|
||||
// (≈ n/4 estimated tokens).
|
||||
func makeTextMessageN(role fantasy.MessageRole, n int) fantasy.Message {
|
||||
return makeTextMessage(role, strings.Repeat("a", n))
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Token estimation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestEstimateTokens(t *testing.T) {
|
||||
tests := []struct {
|
||||
text string
|
||||
@@ -51,29 +62,30 @@ func TestEstimateMessageTokens_Empty(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ShouldCompact (Pi-style: contextTokens > contextWindow - reserveTokens)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestShouldCompact(t *testing.T) {
|
||||
// Create messages that total ~100 tokens (400 chars).
|
||||
longText := make([]byte, 400)
|
||||
for i := range longText {
|
||||
longText[i] = 'a'
|
||||
}
|
||||
msgs := []fantasy.Message{makeTextMessage(fantasy.MessageRoleUser, string(longText))}
|
||||
msgs := []fantasy.Message{makeTextMessageN(fantasy.MessageRoleUser, 400)}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
contextLimit int
|
||||
threshold float64
|
||||
want bool
|
||||
name string
|
||||
contextWindow int
|
||||
reserveTokens int
|
||||
want bool
|
||||
}{
|
||||
{"above threshold", 120, 0.8, true}, // 100 >= 120*0.8=96
|
||||
{"below threshold", 200, 0.8, false}, // 100 < 200*0.8=160
|
||||
{"zero limit", 0, 0.8, false}, // no limit
|
||||
{"zero threshold", 200, 0.0, false}, // no threshold
|
||||
{"exactly at threshold", 125, 0.8, true}, // 100 >= 125*0.8=100
|
||||
{"above threshold", 110, 16, true}, // 100 > 110-16=94 → true
|
||||
{"below threshold", 200, 16, false}, // 100 > 200-16=184 → false
|
||||
{"zero window", 0, 16, false}, // no window
|
||||
{"zero reserve", 200, 0, false}, // no reserve
|
||||
{"exactly at threshold", 116, 16, false}, // 100 > 116-16=100 → false (not >)
|
||||
{"one over", 115, 16, true}, // 100 > 115-16=99 → true
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := ShouldCompact(msgs, tt.contextLimit, tt.threshold)
|
||||
got := ShouldCompact(msgs, tt.contextWindow, tt.reserveTokens)
|
||||
if got != tt.want {
|
||||
t.Errorf("ShouldCompact() = %v, want %v", got, tt.want)
|
||||
}
|
||||
@@ -81,26 +93,41 @@ func TestShouldCompact(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindCutPoint(t *testing.T) {
|
||||
msgs := make([]fantasy.Message, 20)
|
||||
// ---------------------------------------------------------------------------
|
||||
// FindCutPoint (token-based, Pi-style)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestFindCutPoint_TokenBased(t *testing.T) {
|
||||
// Each message is 400 chars = ~100 tokens.
|
||||
msgs := make([]fantasy.Message, 10)
|
||||
for i := range msgs {
|
||||
msgs[i] = makeTextMessage(fantasy.MessageRoleUser, "msg")
|
||||
if i%2 == 0 {
|
||||
msgs[i] = makeTextMessageN(fantasy.MessageRoleUser, 400)
|
||||
} else {
|
||||
msgs[i] = makeTextMessageN(fantasy.MessageRoleAssistant, 400)
|
||||
}
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
preserveRecent int
|
||||
want int
|
||||
name string
|
||||
keepRecentTokens int
|
||||
want int // expected cut point
|
||||
}{
|
||||
{"preserve 10", 10, 10},
|
||||
{"preserve 5", 5, 15},
|
||||
{"preserve all", 20, 0},
|
||||
{"preserve more than total", 25, 0},
|
||||
{"preserve 0 uses default 10", 0, 10},
|
||||
// keepRecentTokens=250 → walk back: msg[9]=100, msg[8]=200 ≤ 250,
|
||||
// msg[7]=300 > 250 → cut = 8.
|
||||
{"keep 250 tokens", 250, 8},
|
||||
// keepRecentTokens=500 → walk back 5 msgs = 500 ≤ 500,
|
||||
// 6th msg = 600 > 500 → cut = 5.
|
||||
{"keep 500 tokens", 500, 5},
|
||||
// keepRecentTokens=1000 → all 10 msgs = 1000, not exceeded → cut = 0.
|
||||
{"keep all", 1000, 0},
|
||||
// keepRecentTokens=50 → msg[9] alone = 100 > 50 → cut = 10,
|
||||
// exceeds len → clamped to 9. 9 ≥ 2 → valid.
|
||||
{"keep very few", 50, 9},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := FindCutPoint(msgs, tt.preserveRecent)
|
||||
got := FindCutPoint(msgs, tt.keepRecentTokens)
|
||||
if got != tt.want {
|
||||
t.Errorf("FindCutPoint() = %d, want %d", got, tt.want)
|
||||
}
|
||||
@@ -108,38 +135,115 @@ func TestFindCutPoint(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindCutPoint_TooFewMessages(t *testing.T) {
|
||||
msgs := []fantasy.Message{
|
||||
makeTextMessageN(fantasy.MessageRoleUser, 400),
|
||||
}
|
||||
got := FindCutPoint(msgs, 50)
|
||||
if got != 0 {
|
||||
t.Errorf("FindCutPoint(1 msg) = %d, want 0", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFindCutPoint_SkipsToolResults(t *testing.T) {
|
||||
// [user, assistant, tool, user, assistant]
|
||||
// Each 400 chars = 100 tokens. keepRecentTokens=150 → walk back:
|
||||
// msg[4] (assistant) = 100 ≤ 150
|
||||
// msg[3] (user) = 200 > 150 → raw cut at 4, but check validity.
|
||||
// msg[4] is assistant → valid cut point. Cut = 4.
|
||||
msgs := []fantasy.Message{
|
||||
makeTextMessageN(fantasy.MessageRoleUser, 400),
|
||||
makeTextMessageN(fantasy.MessageRoleAssistant, 400),
|
||||
makeTextMessageN(fantasy.MessageRoleTool, 400),
|
||||
makeTextMessageN(fantasy.MessageRoleUser, 400),
|
||||
makeTextMessageN(fantasy.MessageRoleAssistant, 400),
|
||||
}
|
||||
got := FindCutPoint(msgs, 150)
|
||||
if got != 4 {
|
||||
t.Errorf("FindCutPoint() = %d, want 4", got)
|
||||
}
|
||||
|
||||
// Now test where the raw cut lands on a tool result.
|
||||
// [user, assistant, tool, tool, user]
|
||||
// keepRecentTokens=50 → walk back: msg[4]=100 > 50 → raw cut at 5? No,
|
||||
// i=4, accumulated=100 > 50, cut = i+1 = 5 → that's len(msgs), so no
|
||||
// valid split. Actually let me think again...
|
||||
// i starts at 4 (last), accumulated += 100 = 100 > 50 → cut = 5.
|
||||
// cut=5 >= len(msgs)=5 → return 0. Correct.
|
||||
|
||||
// Try keepRecentTokens=150 → walk back:
|
||||
// msg[4] (user) = 100 ≤ 150
|
||||
// msg[3] (tool) = 200 > 150 → cut at 4, msg[4] is user → valid.
|
||||
msgs2 := []fantasy.Message{
|
||||
makeTextMessageN(fantasy.MessageRoleUser, 400),
|
||||
makeTextMessageN(fantasy.MessageRoleAssistant, 400),
|
||||
makeTextMessageN(fantasy.MessageRoleTool, 400),
|
||||
makeTextMessageN(fantasy.MessageRoleTool, 400),
|
||||
makeTextMessageN(fantasy.MessageRoleUser, 400),
|
||||
}
|
||||
got2 := FindCutPoint(msgs2, 150)
|
||||
if got2 != 4 {
|
||||
t.Errorf("FindCutPoint(tool results) = %d, want 4", got2)
|
||||
}
|
||||
|
||||
// Where raw cut lands ON a tool message and must scan forward.
|
||||
// [user(0), assistant(1), tool(2), tool(3), user(4), assistant(5)]
|
||||
// keepRecentTokens=250 → walk back:
|
||||
// msg[5] = 100 ≤ 250
|
||||
// msg[4] = 200 ≤ 250
|
||||
// msg[3] = 300 > 250 → cut at 4, msg[4] is user → valid.
|
||||
msgs3 := []fantasy.Message{
|
||||
makeTextMessageN(fantasy.MessageRoleUser, 400),
|
||||
makeTextMessageN(fantasy.MessageRoleAssistant, 400),
|
||||
makeTextMessageN(fantasy.MessageRoleTool, 400),
|
||||
makeTextMessageN(fantasy.MessageRoleTool, 400),
|
||||
makeTextMessageN(fantasy.MessageRoleUser, 400),
|
||||
makeTextMessageN(fantasy.MessageRoleAssistant, 400),
|
||||
}
|
||||
got3 := FindCutPoint(msgs3, 250)
|
||||
if got3 != 4 {
|
||||
t.Errorf("FindCutPoint(scan forward) = %d, want 4", got3)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CompactionOptions defaults
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestCompactionOptions_Defaults(t *testing.T) {
|
||||
opts := CompactionOptions{}
|
||||
opts.defaults()
|
||||
|
||||
if opts.ThresholdPct != 0.8 {
|
||||
t.Errorf("ThresholdPct = %f, want 0.8", opts.ThresholdPct)
|
||||
if opts.ReserveTokens != 16384 {
|
||||
t.Errorf("ReserveTokens = %d, want 16384", opts.ReserveTokens)
|
||||
}
|
||||
if opts.PreserveRecent != 10 {
|
||||
t.Errorf("PreserveRecent = %d, want 10", opts.PreserveRecent)
|
||||
if opts.KeepRecentTokens != 20000 {
|
||||
t.Errorf("KeepRecentTokens = %d, want 20000", opts.KeepRecentTokens)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompactionOptions_DefaultsPreservesExisting(t *testing.T) {
|
||||
opts := CompactionOptions{ThresholdPct: 0.9, PreserveRecent: 5}
|
||||
opts := CompactionOptions{ReserveTokens: 8192, KeepRecentTokens: 10000}
|
||||
opts.defaults()
|
||||
|
||||
if opts.ThresholdPct != 0.9 {
|
||||
t.Errorf("ThresholdPct = %f, want 0.9", opts.ThresholdPct)
|
||||
if opts.ReserveTokens != 8192 {
|
||||
t.Errorf("ReserveTokens = %d, want 8192", opts.ReserveTokens)
|
||||
}
|
||||
if opts.PreserveRecent != 5 {
|
||||
t.Errorf("PreserveRecent = %d, want 5", opts.PreserveRecent)
|
||||
if opts.KeepRecentTokens != 10000 {
|
||||
t.Errorf("KeepRecentTokens = %d, want 10000", opts.KeepRecentTokens)
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Compact (integration — too few messages)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestCompact_TooFewMessages(t *testing.T) {
|
||||
msgs := make([]fantasy.Message, 5)
|
||||
for i := range msgs {
|
||||
msgs[i] = makeTextMessage(fantasy.MessageRoleUser, "short")
|
||||
msgs := []fantasy.Message{
|
||||
makeTextMessageN(fantasy.MessageRoleUser, 400),
|
||||
}
|
||||
|
||||
// Default preserveRecent = 10, so 5 messages is too few.
|
||||
result, newMsgs, err := Compact(context.TODO(), nil, msgs, CompactionOptions{})
|
||||
result, newMsgs, err := Compact(context.TODO(), nil, msgs, CompactionOptions{}, "")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
@@ -150,3 +254,22 @@ func TestCompact_TooFewMessages(t *testing.T) {
|
||||
t.Errorf("messages changed: got %d, want %d", len(newMsgs), len(msgs))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompact_WithinBudget(t *testing.T) {
|
||||
// 2 messages, each 100 tokens, keepRecentTokens=20000 → all fit.
|
||||
msgs := []fantasy.Message{
|
||||
makeTextMessageN(fantasy.MessageRoleUser, 400),
|
||||
makeTextMessageN(fantasy.MessageRoleAssistant, 400),
|
||||
}
|
||||
|
||||
result, newMsgs, err := Compact(context.TODO(), nil, msgs, CompactionOptions{}, "")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if result != nil {
|
||||
t.Error("expected nil result when all messages fit within budget")
|
||||
}
|
||||
if len(newMsgs) != len(msgs) {
|
||||
t.Errorf("messages changed: got %d, want %d", len(newMsgs), len(msgs))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -59,6 +59,12 @@ var SlashCommands = []SlashCommand{
|
||||
Category: "System",
|
||||
Aliases: []string{"/cq"},
|
||||
},
|
||||
{
|
||||
Name: "/compact",
|
||||
Description: "Summarise older messages to free context space",
|
||||
Category: "System",
|
||||
Aliases: []string{"/co"},
|
||||
},
|
||||
{
|
||||
Name: "/quit",
|
||||
Description: "Exit the application",
|
||||
|
||||
@@ -50,6 +50,12 @@ type AppController interface {
|
||||
ClearQueue()
|
||||
// ClearMessages clears the conversation history.
|
||||
ClearMessages()
|
||||
// CompactConversation summarises older messages to free context space.
|
||||
// Runs asynchronously; results are delivered via CompactCompleteEvent or
|
||||
// CompactErrorEvent sent through the registered tea.Program. Returns an
|
||||
// error synchronously if compaction cannot be started (e.g. agent is busy).
|
||||
// customInstructions is optional text appended to the summary prompt.
|
||||
CompactConversation(customInstructions string) error
|
||||
// GetTreeSession returns the tree session manager, or nil if tree sessions
|
||||
// are not enabled. Used by slash commands like /tree, /fork, /session.
|
||||
GetTreeSession() *session.TreeManager
|
||||
@@ -497,6 +503,17 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
return m, tea.Batch(cmds...)
|
||||
}
|
||||
|
||||
// /compact supports optional args: "/compact Focus on API decisions".
|
||||
// GetCommandByName won't match the full text, so check the prefix.
|
||||
if name, args, ok := strings.Cut(msg.Text, " "); ok {
|
||||
if sc := GetCommandByName(name); sc != nil && sc.Name == "/compact" {
|
||||
if cmd := m.handleCompactCommand(strings.TrimSpace(args)); cmd != nil {
|
||||
cmds = append(cmds, cmd)
|
||||
}
|
||||
return m, tea.Batch(cmds...)
|
||||
}
|
||||
}
|
||||
|
||||
// Check extension-registered slash commands. These support arguments
|
||||
// (e.g. "/sub list files"), so we split on the first space.
|
||||
if cmd := m.handleExtensionCommand(msg.Text); cmd != nil {
|
||||
@@ -636,6 +653,20 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
m.state = stateInput
|
||||
m.canceling = false
|
||||
|
||||
case app.CompactCompleteEvent:
|
||||
if m.stream != nil {
|
||||
m.stream.Reset()
|
||||
}
|
||||
m.state = stateInput
|
||||
cmds = append(cmds, m.printCompactResult(msg))
|
||||
|
||||
case app.CompactErrorEvent:
|
||||
if m.stream != nil {
|
||||
m.stream.Reset()
|
||||
}
|
||||
m.state = stateInput
|
||||
cmds = append(cmds, m.printSystemMessage(fmt.Sprintf("Compaction failed: %v", msg.Err)))
|
||||
|
||||
case app.ExtensionPrintEvent:
|
||||
// Extension output — route through styled renderers when a level is set.
|
||||
switch msg.Level {
|
||||
@@ -870,6 +901,8 @@ func (m *AppModel) handleSlashCommand(sc *SlashCommand) tea.Cmd {
|
||||
return m.printUsageMessage()
|
||||
case "/reset-usage":
|
||||
return m.printResetUsage()
|
||||
case "/compact":
|
||||
return m.handleCompactCommand("")
|
||||
case "/clear":
|
||||
if m.appCtrl != nil {
|
||||
m.appCtrl.ClearMessages()
|
||||
@@ -987,6 +1020,7 @@ func (m *AppModel) printHelpMessage() tea.Cmd {
|
||||
"- `/fork`: Branch from an earlier message\n" +
|
||||
"- `/new`: Start a new branch (preserves history)\n\n" +
|
||||
"**System:**\n" +
|
||||
"- `/compact [instructions]`: Summarise older messages to free context space\n" +
|
||||
"- `/clear`: Clear message history\n" +
|
||||
"- `/reset-usage`: Reset usage statistics\n" +
|
||||
"- `/quit`: Exit the application\n\n"
|
||||
@@ -1080,6 +1114,54 @@ func (m *AppModel) printResetUsage() tea.Cmd {
|
||||
return m.printSystemMessage("Usage statistics have been reset.")
|
||||
}
|
||||
|
||||
// handleCompactCommand starts an async compaction. It returns a tea.Cmd that
|
||||
// prints a "compacting..." message and transitions to the working state. If
|
||||
// the app controller rejects the request (busy, closed) it prints an error
|
||||
// instead. customInstructions is optional text appended to the summary
|
||||
// prompt (e.g. "Focus on the API design decisions").
|
||||
func (m *AppModel) handleCompactCommand(customInstructions string) tea.Cmd {
|
||||
if m.appCtrl == nil {
|
||||
return m.printSystemMessage("Compaction is not available.")
|
||||
}
|
||||
if err := m.appCtrl.CompactConversation(customInstructions); err != nil {
|
||||
return m.printSystemMessage(fmt.Sprintf("Cannot compact: %v", err))
|
||||
}
|
||||
// Transition to working state so the spinner shows while compaction runs.
|
||||
m.state = stateWorking
|
||||
var spinnerCmd tea.Cmd
|
||||
if m.stream != nil {
|
||||
_, spinnerCmd = m.stream.Update(app.SpinnerEvent{Show: true})
|
||||
}
|
||||
return tea.Batch(m.printSystemMessage("Compacting conversation..."), spinnerCmd)
|
||||
}
|
||||
|
||||
// printCompactResult renders the compaction summary in a styled block with
|
||||
// a distinct border color and a stats subtitle.
|
||||
func (m *AppModel) printCompactResult(evt app.CompactCompleteEvent) tea.Cmd {
|
||||
theme := GetTheme()
|
||||
|
||||
saved := evt.OriginalTokens - evt.CompactedTokens
|
||||
subtitle := fmt.Sprintf(
|
||||
"%d messages summarised, ~%dk tokens freed (%dk -> %dk)",
|
||||
evt.MessagesRemoved, saved/1000, evt.OriginalTokens/1000, evt.CompactedTokens/1000,
|
||||
)
|
||||
|
||||
content := evt.Summary
|
||||
if subtitle != "" {
|
||||
sub := lipgloss.NewStyle().Foreground(theme.VeryMuted).Render(" " + subtitle)
|
||||
content = strings.TrimSuffix(content, "\n") + "\n\n" + sub
|
||||
}
|
||||
|
||||
rendered := renderContentBlock(
|
||||
content,
|
||||
m.width,
|
||||
WithAlign(lipgloss.Left),
|
||||
WithBorderColor(theme.Secondary),
|
||||
WithMarginBottom(1),
|
||||
)
|
||||
return tea.Println(rendered)
|
||||
}
|
||||
|
||||
// flushStreamContent gets the rendered content from the stream component,
|
||||
// emits it above the BT region via tea.Println, and resets the stream. This
|
||||
// is called before printing tool calls (streaming completes before tools fire)
|
||||
|
||||
@@ -45,6 +45,10 @@ func (s *stubAppController) ClearMessages() {
|
||||
s.clearMsgCalled++
|
||||
}
|
||||
|
||||
func (s *stubAppController) CompactConversation(_ string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *stubAppController) GetTreeSession() *session.TreeManager {
|
||||
return nil
|
||||
}
|
||||
|
||||
+22
-19
@@ -23,21 +23,22 @@ func (m *Kit) EstimateContextTokens() int {
|
||||
}
|
||||
|
||||
// ShouldCompact reports whether the conversation is near the model's context
|
||||
// limit and should be compacted. Returns false if the model's context limit
|
||||
// is unknown or if no compaction options are configured.
|
||||
// limit and should be compacted. Uses Pi's formula:
|
||||
// contextTokens > contextWindow − reserveTokens.
|
||||
// Returns false if the model's context limit is unknown.
|
||||
func (m *Kit) ShouldCompact() bool {
|
||||
info := m.GetModelInfo()
|
||||
if info == nil || info.Limit.Context <= 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
threshold := 0.8
|
||||
if m.compactionOpts != nil && m.compactionOpts.ThresholdPct > 0 {
|
||||
threshold = m.compactionOpts.ThresholdPct
|
||||
reserveTokens := 16384
|
||||
if m.compactionOpts != nil && m.compactionOpts.ReserveTokens > 0 {
|
||||
reserveTokens = m.compactionOpts.ReserveTokens
|
||||
}
|
||||
|
||||
messages := m.treeSession.GetFantasyMessages()
|
||||
return compaction.ShouldCompact(messages, info.Limit.Context, threshold)
|
||||
return compaction.ShouldCompact(messages, info.Limit.Context, reserveTokens)
|
||||
}
|
||||
|
||||
// GetContextStats returns current context usage statistics including
|
||||
@@ -61,13 +62,16 @@ func (m *Kit) GetContextStats() ContextStats {
|
||||
}
|
||||
|
||||
// Compact summarises older messages to reduce context usage. If opts is nil,
|
||||
// the instance's CompactionOptions (or sensible defaults) are used. The model's
|
||||
// context limit is automatically populated from the model registry when
|
||||
// opts.ContextLimit is 0.
|
||||
// the instance's CompactionOptions (or sensible defaults) are used. The
|
||||
// model's context window is automatically populated from the model registry
|
||||
// when opts.ContextWindow is 0.
|
||||
//
|
||||
// After compaction, the tree session is cleared and replaced with the compacted
|
||||
// messages (summary + preserved recent messages).
|
||||
func (m *Kit) Compact(ctx context.Context, opts *CompactionOptions) (*CompactionResult, error) {
|
||||
// customInstructions is optional text appended to the summary prompt (e.g.
|
||||
// "Focus on the API design decisions"). Pass "" for the default prompt.
|
||||
//
|
||||
// After compaction, the tree session is cleared and replaced with the
|
||||
// compacted messages (summary + preserved recent messages).
|
||||
func (m *Kit) Compact(ctx context.Context, opts *CompactionOptions, customInstructions string) (*CompactionResult, error) {
|
||||
if opts == nil {
|
||||
if m.compactionOpts != nil {
|
||||
opts = m.compactionOpts
|
||||
@@ -76,25 +80,24 @@ func (m *Kit) Compact(ctx context.Context, opts *CompactionOptions) (*Compaction
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-populate context limit from model info if not set.
|
||||
if opts.ContextLimit <= 0 {
|
||||
// Auto-populate context window from model info if not set.
|
||||
if opts.ContextWindow <= 0 {
|
||||
if info := m.GetModelInfo(); info != nil {
|
||||
opts.ContextLimit = info.Limit.Context
|
||||
opts.ContextWindow = info.Limit.Context
|
||||
}
|
||||
}
|
||||
|
||||
messages := m.treeSession.GetFantasyMessages()
|
||||
if len(messages) == 0 {
|
||||
return nil, fmt.Errorf("cannot compact: no messages in session")
|
||||
if len(messages) < 2 {
|
||||
return nil, fmt.Errorf("cannot compact: need at least 2 messages")
|
||||
}
|
||||
|
||||
model := m.agent.GetModel()
|
||||
result, newMessages, err := compaction.Compact(ctx, model, messages, *opts)
|
||||
result, newMessages, err := compaction.Compact(ctx, model, messages, *opts, customInstructions)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if result == nil {
|
||||
// Nothing to compact (too few messages).
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
|
||||
+1
-1
@@ -540,7 +540,7 @@ func (m *Kit) runTurn(ctx context.Context, promptLabel string, prompt string, pr
|
||||
|
||||
// Auto-compact if enabled and conversation is near the context limit.
|
||||
if m.autoCompact && m.ShouldCompact() {
|
||||
_, _ = m.Compact(ctx, m.compactionOpts) // best-effort
|
||||
_, _ = m.Compact(ctx, m.compactionOpts, "") // best-effort
|
||||
}
|
||||
|
||||
// Build context from the tree so only the current branch is sent.
|
||||
|
||||
Reference in New Issue
Block a user