Files
Ed Zynda 2de98d32be fix(ui): accurate context token tracking including cache tokens
- Include all token categories in context fill calculation:
  InputTokens + CacheReadTokens + CacheCreationTokens + OutputTokens
- With Anthropic/kimi prompt caching, InputTokens can be near-zero
  while CacheReadTokens holds the bulk of the context
- Include OutputTokens since assistant output becomes context next turn
- Remove max-only guard in SetContextTokens so context shrinks after
  compaction instead of staying stuck at the high-water mark
- Reset context tokens to 0 after compaction in both SDK and UI layers
- Use real API-reported token counts in ShouldCompact() instead of
  the chars/4 text heuristic which misses system prompts and tool defs
2026-04-10 17:05:47 +03:00

78 lines
3.0 KiB
Go

package app
import (
"context"
"github.com/mark3labs/kit/internal/config"
"github.com/mark3labs/kit/internal/session"
kit "github.com/mark3labs/kit/pkg/kit"
)
// UsageUpdater is the interface the app layer uses to record token usage after
// each agent step. It is satisfied by *ui.UsageTracker (which lives in
// internal/ui) without creating an import cycle — the concrete type is wired
// in cmd/root.go, which can import both packages.
type UsageUpdater interface {
// UpdateUsage records actual token counts returned by the provider.
// The counts come from fantasy's TotalUsage (aggregate across all steps
// in a multi-step tool-calling run) and are used for session cost tracking.
UpdateUsage(inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens int)
// EstimateAndUpdateUsage falls back to text-based token estimation when
// the provider does not return exact counts.
EstimateAndUpdateUsage(inputText, outputText string)
// SetContextTokens records the approximate current context window fill
// level. This should be the sum of ALL token categories from the last
// API call: InputTokens + CacheReadTokens + CacheCreationTokens +
// OutputTokens. With Anthropic prompt caching, InputTokens can be
// near-zero while CacheReadTokens holds the bulk of the context.
SetContextTokens(tokens int)
}
// Options configures an App instance.
type Options struct {
// Kit is the SDK instance. executeStep() delegates to kit.PromptResult()
// and events flow through SDK subscriptions. Required in production;
// tests may use PromptFunc instead.
Kit *kit.Kit
// PromptFunc overrides Kit.PromptResult for testing. When set,
// executeStep calls this directly, bypassing SDK event subscription
// and usage tracking. Must not be set in production.
PromptFunc func(ctx context.Context, prompt string) (*kit.TurnResult, error)
// TreeSession is the tree-structured JSONL session manager. When non-nil,
// conversation history is persisted as an append-only JSONL tree and tree
// navigation (/tree, /fork) is enabled.
TreeSession *session.TreeManager
// MCPConfig is the full MCP configuration used for session continuation and
// slash command resolution.
MCPConfig *config.Config
// ModelName is the display name of the model (e.g. "claude-sonnet-4-5").
ModelName string
// ServerNames holds the names of loaded MCP servers, used for slash command
// autocomplete.
ServerNames []string
// ToolNames holds the names of available tools, used for slash command
// autocomplete.
ToolNames []string
// StreamingEnabled controls whether the agent uses streaming responses.
StreamingEnabled bool
// Quiet suppresses all output except the final response (non-interactive mode).
Quiet bool
// Debug enables verbose debug logging.
Debug bool
// UsageTracker is an optional callback for recording token usage after each
// agent step. When non-nil, the app layer calls UpdateUsage (or
// EstimateAndUpdateUsage as a fallback) using the usage data returned by the
// agent. Satisfied by *ui.UsageTracker; wired in cmd/root.go.
UsageTracker UsageUpdater
}