From c5b75674a31de23d7fc694c161c320d496b3ec0c Mon Sep 17 00:00:00 2001 From: Ed Zynda Date: Thu, 26 Feb 2026 16:10:43 +0300 Subject: [PATCH] fix: token usage tracking with fantasy and sticky display across all visual modes - Fix context percentage: use FinalResponse.Usage (last API call) instead of TotalUsage (sum of all tool-calling steps) which overstated context fill level - Fix token count: display current context window tokens, not cumulative session total, so the number and percentage tell a consistent story - Fix script mode double-counting: app.updateUsage already updates the shared tracker before sending StepCompleteEvent, so remove redundant UpdateUsageFromResponse call - Add sticky usage display in TUI: render in View() layout between stream and separator instead of tea.Println so it updates in place - Add usage display for non-interactive --prompt mode (non-quiet) - Add SetContextTokens to UsageUpdater interface for separating billing tokens (TotalUsage) from context utilization (FinalResponse.Usage) --- cmd/root.go | 11 +++- cmd/script.go | 8 +-- internal/app/app.go | 16 ++++++ internal/app/options.go | 6 +++ internal/ui/cli.go | 6 ++- internal/ui/model.go | 37 +++++++++++-- internal/ui/usage_tracker.go | 67 +++++++++++++++--------- internal/ui/usage_tracker_render_test.go | 4 +- 8 files changed, 119 insertions(+), 36 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index a3de7bd7..95e894ec 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -755,11 +755,20 @@ func runNormalMode(ctx context.Context) error { // // When --no-exit is set, after RunOnce completes the interactive BubbleTea TUI // is started so the user can continue the conversation. -func runNonInteractiveModeApp(ctx context.Context, appInstance *app.App, prompt string, _, noExit bool, modelName, providerName, loadingMessage string, serverNames, toolNames []string, usageTracker *ui.UsageTracker) error { +func runNonInteractiveModeApp(ctx context.Context, appInstance *app.App, prompt string, quiet, noExit bool, modelName, providerName, loadingMessage string, serverNames, toolNames []string, usageTracker *ui.UsageTracker) error { if err := appInstance.RunOnce(ctx, prompt); err != nil { return err } + // Display token usage after the response (unless quiet mode suppresses output). + // The app layer has already updated the tracker inside RunOnce. + if !quiet && usageTracker != nil { + usageInfo := usageTracker.RenderUsageInfo() + if usageInfo != "" { + fmt.Println(usageInfo) + } + } + // If --no-exit was requested, hand off to the interactive TUI. if noExit { return runInteractiveModeBubbleTea(ctx, appInstance, modelName, providerName, loadingMessage, serverNames, toolNames, usageTracker) diff --git a/cmd/script.go b/cmd/script.go index dd369bfb..d487fe95 100644 --- a/cmd/script.go +++ b/cmd/script.go @@ -851,10 +851,10 @@ func (h *scriptEventHandler) Handle(msg tea.Msg) { _ = h.cli.DisplayAssistantMessageWithModel(responseText, h.modelName) } - // Update and display usage. - if e.Response != nil { - h.cli.UpdateUsageFromResponse(e.Response, "") - } + // Display usage. The app layer has already updated the shared + // UsageTracker (via app.updateUsage in RunOnceWithDisplay) before + // sending this event, so we only need to render — calling + // UpdateUsageFromResponse here would double-count. h.cli.DisplayUsageAfterResponse() // Reset for next step in the agentic loop. diff --git a/internal/app/app.go b/internal/app/app.go index 02bacbe0..004e00d9 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -419,6 +419,11 @@ func (a *App) sendEvent(msg tea.Msg) { // updateUsage records token usage from a completed agent step into the configured // UsageTracker (if any). It uses the actual token counts from the agent result's // TotalUsage field when available; otherwise it falls back to text-based estimation. +// +// TotalUsage is the sum across all tool-calling steps in a single agent run and +// is used for session cost tracking. For context window utilization we use the +// final response's per-call usage (FinalResponse.Usage) which reflects the actual +// context size at the last API call. func (a *App) updateUsage(result *agent.GenerateWithLoopResult, userPrompt string) { if a.opts.UsageTracker == nil || result == nil { return @@ -438,5 +443,16 @@ func (a *App) updateUsage(result *agent.GenerateWithLoopResult, userPrompt strin responseText = result.FinalResponse.Content.Text() } a.opts.UsageTracker.EstimateAndUpdateUsage(userPrompt, responseText) + return // EstimateAndUpdateUsage already sets context tokens internally + } + + // Set context window utilization from the final API call's per-step usage. + // FinalResponse.Usage represents the last step only (not the aggregate), + // so input+output there reflects the actual context fill level. + if result.FinalResponse != nil { + fu := result.FinalResponse.Usage + if ct := int(fu.InputTokens) + int(fu.OutputTokens); ct > 0 { + a.opts.UsageTracker.SetContextTokens(ct) + } } } diff --git a/internal/app/options.go b/internal/app/options.go index c563ae82..9b9cc8f8 100644 --- a/internal/app/options.go +++ b/internal/app/options.go @@ -32,10 +32,16 @@ type AgentRunner interface { // in cmd/root.go, which can import both packages. type UsageUpdater interface { // UpdateUsage records actual token counts returned by the provider. + // The counts come from fantasy's TotalUsage (aggregate across all steps + // in a multi-step tool-calling run) and are used for session cost tracking. UpdateUsage(inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens int) // EstimateAndUpdateUsage falls back to text-based token estimation when // the provider does not return exact counts. EstimateAndUpdateUsage(inputText, outputText string) + // SetContextTokens records the approximate current context window fill + // level. This should be the final API call's input+output tokens (from + // FinalResponse.Usage), NOT the aggregate TotalUsage. + SetContextTokens(tokens int) } // Options configures an App instance. It mirrors the fields from AgenticLoopConfig diff --git a/internal/ui/cli.go b/internal/ui/cli.go index 1b2abfe9..459fe72a 100644 --- a/internal/ui/cli.go +++ b/internal/ui/cli.go @@ -262,8 +262,12 @@ func (c *CLI) UpdateUsageFromResponse(response *fantasy.Response, inputText stri cacheReadTokens := int(usage.CacheReadTokens) cacheWriteTokens := int(usage.CacheCreationTokens) c.usageTracker.UpdateUsage(inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens) + // Per-response usage is a single API call, so it represents the + // actual context window fill level. + c.usageTracker.SetContextTokens(inputTokens + outputTokens) } else { - // Fallback to estimation if no metadata is available + // Fallback to estimation if no metadata is available. + // EstimateAndUpdateUsage sets context tokens internally. c.usageTracker.EstimateAndUpdateUsage(inputText, response.Content.Text()) } } diff --git a/internal/ui/model.go b/internal/ui/model.go index d9b85714..47ae15f7 100644 --- a/internal/ui/model.go +++ b/internal/ui/model.go @@ -433,7 +433,9 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { case app.StepCompleteEvent: // Flush any remaining streamed text to scrollback, then reset stream - // and return to input state. + // and return to input state. Token usage is rendered as a sticky + // element in View() — the app layer has already updated the shared + // UsageTracker before sending this event. cmds = append(cmds, m.flushStreamContent()) if m.stream != nil { m.stream.Reset() @@ -479,13 +481,21 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { } // View implements tea.Model. It renders the stacked layout: -// stream region + separator + [queued messages] + input region. +// stream region + [usage info] + separator + [queued messages] + input region. func (m *AppModel) View() tea.View { streamView := m.renderStream() separator := m.renderSeparator() inputView := m.renderInput() - parts := []string{streamView, separator} + parts := []string{streamView} + + // Sticky usage info sits between the stream and separator so it is + // always visible at the bottom of the messages area and updates in place. + if usageView := m.renderUsageInfo(); usageView != "" { + parts = append(parts, usageView) + } + + parts = append(parts, separator) if queuedView := m.renderQueuedMessages(); queuedView != "" { parts = append(parts, queuedView) @@ -523,6 +533,16 @@ func (m *AppModel) renderStream() string { return m.stream.View().Content } +// renderUsageInfo returns the sticky token usage line (tokens + context% + cost). +// Returns an empty string when no usage data is available (no requests yet or +// tracker is nil), so the element is invisible until the first response arrives. +func (m *AppModel) renderUsageInfo() string { + if m.usageTracker == nil { + return "" + } + return m.usageTracker.RenderUsageInfo() +} + // renderSeparator renders the separator line with an optional queue count badge. func (m *AppModel) renderSeparator() string { theme := GetTheme() @@ -796,7 +816,8 @@ func (m *AppModel) flushStreamContent() tea.Cmd { // // Layout (line counts): // -// stream region = total - separator(1) - queued(N*5) - input(5) +// stream region = total - usage(0-1) - separator(1) - queued(N*5) - input(5) +// usage info = 0 or 1 line (visible only after first response) // separator = 1 line // queued msgs = ~5 lines per message (padding + text + badge + padding) // input region = 5 lines: title(1) + textarea(3) + help(1) @@ -806,7 +827,13 @@ func (m *AppModel) distributeHeight() { const linesPerQueuedMsg = 5 queuedLines := len(m.queuedMessages) * linesPerQueuedMsg - streamHeight := max(m.height-separatorLines-queuedLines-inputLines, 0) + // Reserve space for the sticky usage line when the tracker has data. + usageLines := 0 + if m.usageTracker != nil && m.usageTracker.GetSessionStats().RequestCount > 0 { + usageLines = 1 + } + + streamHeight := max(m.height-usageLines-separatorLines-queuedLines-inputLines, 0) if m.stream != nil { m.stream.SetHeight(streamHeight) diff --git a/internal/ui/usage_tracker.go b/internal/ui/usage_tracker.go index 1a9879e5..91439fdd 100644 --- a/internal/ui/usage_tracker.go +++ b/internal/ui/usage_tracker.go @@ -41,13 +41,14 @@ type SessionStats struct { // for LLM interactions throughout a session. It provides real-time usage information // and supports both estimated and actual token counts. OAuth users see $0 costs. type UsageTracker struct { - mu sync.RWMutex - modelInfo *models.ModelInfo - provider string - sessionStats SessionStats - lastRequest *UsageStats - width int - isOAuth bool // Whether OAuth credentials are being used (costs should be $0) + mu sync.RWMutex + modelInfo *models.ModelInfo + provider string + sessionStats SessionStats + lastRequest *UsageStats + contextTokens int // approximate current context window utilization (last API call) + width int + isOAuth bool // Whether OAuth credentials are being used (costs should be $0) } // NewUsageTracker creates and initializes a new UsageTracker for the specified model. @@ -119,11 +120,27 @@ func (ut *UsageTracker) UpdateUsage(inputTokens, outputTokens, cacheReadTokens, // EstimateAndUpdateUsage estimates token counts from raw text strings and updates // the usage statistics. This method is used when actual token counts are not available -// from the API response. +// from the API response. The estimated values also serve as the context utilization +// approximation since they represent a single API call. func (ut *UsageTracker) EstimateAndUpdateUsage(inputText, outputText string) { inputTokens := estimateTokens(inputText) outputTokens := estimateTokens(outputText) ut.UpdateUsage(inputTokens, outputTokens, 0, 0) + // For estimated usage the values represent a single call, so they are a + // reasonable proxy for the current context window fill level. + ut.mu.Lock() + ut.contextTokens = inputTokens + outputTokens + ut.mu.Unlock() +} + +// SetContextTokens records the approximate current context window utilization. +// This should be set from the final API call's input + output tokens (i.e. +// FinalResponse.Usage) rather than the aggregate TotalUsage, because TotalUsage +// sums across all tool-calling steps and overstates the actual window fill level. +func (ut *UsageTracker) SetContextTokens(tokens int) { + ut.mu.Lock() + defer ut.mu.Unlock() + ut.contextTokens = tokens } // RenderUsageInfo generates a formatted string displaying current usage statistics @@ -134,31 +151,32 @@ func (ut *UsageTracker) RenderUsageInfo() string { ut.mu.RLock() defer ut.mu.RUnlock() - // if ut.sessionStats.RequestCount == 0 { - // return "" - // } + if ut.sessionStats.RequestCount == 0 { + return "" + } - // Import lipgloss for styling baseStyle := lipgloss.NewStyle() - // Calculate total tokens - totalTokens := ut.sessionStats.TotalInputTokens + ut.sessionStats.TotalOutputTokens + // Display the current context window token count (from the last API call), + // not the cumulative session total. This keeps the number consistent with + // the percentage and answers "how full is my context right now?". + displayTokens := ut.contextTokens // Format tokens with K/M suffix for better readability var tokenStr string - if totalTokens >= 1000000 { - tokenStr = fmt.Sprintf("%.1fM", float64(totalTokens)/1000000) - } else if totalTokens >= 1000 { - tokenStr = fmt.Sprintf("%.1fK", float64(totalTokens)/1000) + if displayTokens >= 1000000 { + tokenStr = fmt.Sprintf("%.1fM", float64(displayTokens)/1000000) + } else if displayTokens >= 1000 { + tokenStr = fmt.Sprintf("%.1fK", float64(displayTokens)/1000) } else { - tokenStr = fmt.Sprintf("%d", totalTokens) + tokenStr = fmt.Sprintf("%d", displayTokens) } - // Calculate percentage based on context limit with color coding + // Calculate context window utilization percentage from the same value. var percentageStr string var percentageColor color.Color - if ut.modelInfo.Limit.Context > 0 { - percentage := float64(totalTokens) / float64(ut.modelInfo.Limit.Context) * 100 + if ut.modelInfo.Limit.Context > 0 && displayTokens > 0 { + percentage := float64(displayTokens) / float64(ut.modelInfo.Limit.Context) * 100 // Color code based on usage percentage theme := GetTheme() @@ -202,8 +220,8 @@ func (ut *UsageTracker) RenderUsageInfo() string { Foreground(theme.Muted). Render(" | Cost: ") - // Build the enhanced display - return fmt.Sprintf("%s%s%s%s%s\n", + // Build the enhanced display (no trailing newline — callers control spacing). + return fmt.Sprintf("%s%s%s%s%s", tokensLabel, tokensValue, percentageStr, costLabel, costStr) } @@ -237,6 +255,7 @@ func (ut *UsageTracker) Reset() { defer ut.mu.Unlock() ut.sessionStats = SessionStats{} ut.lastRequest = nil + ut.contextTokens = 0 } // SetWidth updates the terminal width used for formatting usage information display. diff --git a/internal/ui/usage_tracker_render_test.go b/internal/ui/usage_tracker_render_test.go index e683b587..68a192b8 100644 --- a/internal/ui/usage_tracker_render_test.go +++ b/internal/ui/usage_tracker_render_test.go @@ -32,7 +32,8 @@ func TestUsageTracker_RenderUsageInfo_OAuth(t *testing.T) { // Test OAuth rendering (should show $0.00) oauthTracker := NewUsageTracker(modelInfo, "anthropic", 80, true) - oauthTracker.UpdateUsage(1500, 500, 0, 0) // 2000 total tokens + oauthTracker.UpdateUsage(1500, 500, 0, 0) // 2000 total tokens (session/billing) + oauthTracker.SetContextTokens(1500 + 500) // context window utilization rendered := stripAnsi(oauthTracker.RenderUsageInfo()) @@ -50,6 +51,7 @@ func TestUsageTracker_RenderUsageInfo_OAuth(t *testing.T) { // Test regular API key rendering (should show actual cost) regularTracker := NewUsageTracker(modelInfo, "anthropic", 80, false) regularTracker.UpdateUsage(1500, 500, 0, 0) // Same token usage + regularTracker.SetContextTokens(1500 + 500) // context window utilization regularRendered := stripAnsi(regularTracker.RenderUsageInfo())