feat: update token usage after each step in multi-step turns

Previously, token usage and costs were only updated at the end of a complete turn. For long-running multi-step tool-calling conversations, this meant the status bar showed stale (or zero) costs during the entire interaction. Now, after each complete step (tool call + result), the usage tracker is updated with the actual token counts from that step. This provides real-time cost accumulation visible in the status bar. Changes: - Add StepUsageHandler type and onStepUsage parameter to agent - Emit StepUsageEvent from kit layer after each step completes - Handle StepUsageEvent in app layer to update UsageTracker - Add EventStepUsage constant and StepUsageEvent struct to events The step usage is additive - each step's tokens are added to the running session totals, just like the final turn usage was before.
fix: update token counting when switching models mid-session
2026-06-21 14:39:38 +00:00 · 2026-03-25 18:17:48 +03:00 · 2026-03-25 18:09:36 +03:00 · 2026-03-25 18:02:50 +03:00
7 changed files with 129 additions and 4 deletions
@@ -13,6 +13,7 @@ import (
 	"charm.land/fantasy"
 	"charm.land/lipgloss/v2"
 	"github.com/mark3labs/kit/internal/app"
+	"github.com/mark3labs/kit/internal/auth"
 	"github.com/mark3labs/kit/internal/config"
 	"github.com/mark3labs/kit/internal/extensions"
 	"github.com/mark3labs/kit/internal/models"
@@ -955,6 +956,24 @@ func runNormalMode(ctx context.Context) error {
 				kitInstance.UpdateExtensionContextModel(modelString)
 				// Fire OnModelChange event to extensions.
 				kitInstance.EmitModelChange(modelString, previousModel, "extension")
+				// Update usage tracker with new model info for correct token counting.
+				if usageTracker != nil {
+					newProvider, newModel, _ := models.ParseModelString(modelString)
+					if newProvider != "unknown" && newModel != "unknown" && newProvider != "ollama" {
+						registry := models.GetGlobalRegistry()
+						if modelInfo := registry.LookupModel(newProvider, newModel); modelInfo != nil {
+							// Check OAuth status for Anthropic models
+							isOAuth := false
+							if newProvider == "anthropic" {
+								_, source, err := auth.GetAnthropicAPIKey(viper.GetString("provider-api-key"))
+								if err == nil && strings.HasPrefix(source, "stored OAuth") {
+									isOAuth = true
+								}
+							}
+							usageTracker.UpdateModelInfo(modelInfo, newProvider, isOAuth)
+						}
+					}
+				}
 				return nil
 			},
 			GetAvailableModels: func() []extensions.ModelInfoEntry {
@@ -1152,6 +1171,24 @@ func runNormalMode(ctx context.Context) error {
 		// this callback runs synchronously inside BubbleTea's Update(), and
 		// NotifyModelChanged calls prog.Send() which deadlocks. The UI layer
 		// updates m.providerName and m.modelName directly after setModel returns.
+		// Update usage tracker with new model info for correct token counting.
+		if usageTracker != nil {
+			newProvider, newModel, _ := models.ParseModelString(modelString)
+			if newProvider != "unknown" && newModel != "unknown" && newProvider != "ollama" {
+				registry := models.GetGlobalRegistry()
+				if modelInfo := registry.LookupModel(newProvider, newModel); modelInfo != nil {
+					// Check OAuth status for Anthropic models
+					isOAuth := false
+					if newProvider == "anthropic" {
+						_, source, err := auth.GetAnthropicAPIKey(viper.GetString("provider-api-key"))
+						if err == nil && strings.HasPrefix(source, "stored OAuth") {
+							isOAuth = true
+						}
+					}
+					usageTracker.UpdateModelInfo(modelInfo, newProvider, isOAuth)
+				}
+			}
+		}
 		return nil
 	}
 	emitModelChangeForUI := func(newModel, previousModel, source string) {
@@ -70,6 +70,11 @@ type ReasoningDeltaHandler func(delta string)
 // Note: This is an alias for core.ToolOutputCallback to avoid import cycles.
 type ToolOutputHandler = core.ToolOutputCallback

+// StepUsageHandler is a function type for handling token usage after each
+// complete step in a multi-step agent turn. This enables real-time cost
+// tracking during long-running tool-calling conversations.
+type StepUsageHandler func(inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens int64)
+
 // Agent represents an AI agent with core tool integration using the fantasy library.
 // Core tools (bash, read, write, edit, grep, find, ls) are registered as direct
 // fantasy.AgentTool implementations — no MCP layer, no serialization overhead.
@@ -225,7 +230,7 @@ func (a *Agent) GenerateWithLoop(ctx context.Context, messages []fantasy.Message
 	onResponse ResponseHandler, onToolCallContent ToolCallContentHandler,
 ) (*GenerateWithLoopResult, error) {
 	return a.GenerateWithLoopAndStreaming(ctx, messages, onToolCall, onToolExecution, onToolResult,
-		onResponse, onToolCallContent, nil, nil, nil)
+		onResponse, onToolCallContent, nil, nil, nil, nil)
 }

 // GenerateWithLoopAndStreaming processes messages using the fantasy agent with streaming and callbacks.
@@ -237,6 +242,7 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 	onStreamingResponse StreamingResponseHandler,
 	onReasoningDelta ReasoningDeltaHandler,
 	onToolOutput ToolOutputHandler,
+	onStepUsage StepUsageHandler,
 ) (*GenerateWithLoopResult, error) {

 	// Inject tool output handler into context for use by core tools (e.g., bash).
@@ -351,6 +357,11 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 				if text != "" && len(toolCalls) > 0 && onToolCallContent != nil {
 					onToolCallContent(text)
 				}
+				// Emit step usage for real-time cost tracking
+				if onStepUsage != nil {
+					onStepUsage(step.Usage.InputTokens, step.Usage.OutputTokens,
+						step.Usage.CacheReadTokens, step.Usage.CacheCreationTokens)
+				}
 				return nil
 			},
 		})
@@ -678,6 +678,15 @@ func (a *App) subscribeSDKEvents(sendFn func(tea.Msg)) func() {
 				Chunk:      ev.Chunk,
 				IsStderr:   ev.IsStderr,
 			})
+		case kit.StepUsageEvent:
+			if a.opts.UsageTracker != nil {
+				a.opts.UsageTracker.UpdateUsage(
+					int(ev.InputTokens),
+					int(ev.OutputTokens),
+					int(ev.CacheReadTokens),
+					int(ev.CacheWriteTokens),
+				)
+			}
 		}
 	}))

@@ -1742,6 +1742,8 @@ func (m *AppModel) renderStream() string {

 // renderStreamingBashOutput renders accumulated streaming bash output (stdout + stderr)
 // below the LLM streaming text. Returns empty string if no bash output is present.
+// Lines are truncated to the terminal width and capped to maxBashLines to prevent
+// long-running commands from blowing up the TUI layout.
 func (m *AppModel) renderStreamingBashOutput(theme Theme) string {
 	m.streamingMu.RLock()
 	stdoutLines := make([]string, len(m.streamingBashOutput))
@@ -1755,7 +1757,9 @@ func (m *AppModel) renderStreamingBashOutput(theme Theme) string {
 	}

 	const lineIndent = "  "
-	width := m.width - 2 // Account for indent and padding
+	lineWidth := max(m.width-2-len(lineIndent), 20)
+	// Account for PaddingLeft(1) on the output/stderr styles.
+	maxLineChars := lineWidth - 1

 	outputStyle := lipgloss.NewStyle().
 		Background(theme.CodeBg).
@@ -1766,17 +1770,47 @@ func (m *AppModel) renderStreamingBashOutput(theme Theme) string {
 		Background(theme.CodeBg).
 		PaddingLeft(1)

+	// Cap displayed lines to maxBashLines (show the tail, since streaming
+	// output is most useful at the end). The buffer itself is larger to
+	// preserve context, but we only render the last N lines.
+	totalLines := len(stdoutLines) + len(stderrLines)
+	var hiddenCount int
+	if totalLines > maxBashLines {
+		hiddenCount = totalLines - maxBashLines
+		// Trim from stdout first (older output), then stderr.
+		remaining := maxBashLines
+		if len(stderrLines) >= remaining {
+			stdoutLines = nil
+			stderrLines = stderrLines[len(stderrLines)-remaining:]
+		} else {
+			remaining -= len(stderrLines)
+			if len(stdoutLines) > remaining {
+				stdoutLines = stdoutLines[len(stdoutLines)-remaining:]
+			}
+		}
+	}
+
 	var lines []string

+	// Truncation hint at the top.
+	if hiddenCount > 0 {
+		hint := fmt.Sprintf("...(%d more lines above)", hiddenCount)
+		hintContent := outputStyle.Width(lineWidth).
+			Foreground(theme.Muted).Italic(true).Render(hint)
+		lines = append(lines, lineIndent+hintContent)
+	}
+
 	// Render stdout lines.
 	for _, line := range stdoutLines {
-		styled := outputStyle.Width(width - len(lineIndent)).Render(line)
+		line = truncateLine(strings.TrimRight(line, "\n"), maxLineChars)
+		styled := outputStyle.Width(lineWidth).Render(line)
 		lines = append(lines, lineIndent+styled)
 	}

 	// Render stderr lines with error styling.
 	for _, line := range stderrLines {
-		styled := stderrStyle.Width(width - len(lineIndent)).Render(line)
+		line = truncateLine(strings.TrimRight(line, "\n"), maxLineChars)
+		styled := stderrStyle.Width(lineWidth).Render(line)
 		lines = append(lines, lineIndent+styled)
 	}

@@ -266,3 +266,14 @@ func (ut *UsageTracker) SetWidth(width int) {
 	defer ut.mu.Unlock()
 	ut.width = width
 }
+
+// UpdateModelInfo updates the model information and OAuth status when the model
+// is switched mid-session. This ensures token costs and context limits are
+// calculated correctly for the new model.
+func (ut *UsageTracker) UpdateModelInfo(modelInfo *models.ModelInfo, provider string, isOAuth bool) {
+	ut.mu.Lock()
+	defer ut.mu.Unlock()
+	ut.modelInfo = modelInfo
+	ut.provider = provider
+	ut.isOAuth = isOAuth
+}
@@ -41,6 +41,7 @@ const (
 	EventReasoningDelta EventType = "reasoning_delta"
 	// EventToolOutput fires when a tool produces streaming output chunks.
 	EventToolOutput EventType = "tool_output"
+	EventStepUsage  EventType = "step_usage"
 )

 // ---------------------------------------------------------------------------
@@ -249,6 +250,19 @@ type ResponseEvent struct {
 // EventType implements Event.
 func (e ResponseEvent) EventType() EventType { return EventResponse }

+// StepUsageEvent fires after each complete step in a multi-step agent turn,
+// carrying the token usage for that specific step. This enables real-time
+// cost tracking during long-running tool-calling conversations.
+type StepUsageEvent struct {
+	InputTokens      uint64
+	OutputTokens     uint64
+	CacheReadTokens  uint64
+	CacheWriteTokens uint64
+}
+
+// EventType implements Event.
+func (e StepUsageEvent) EventType() EventType { return EventStepUsage }
+
 // CompactionEvent fires after a successful compaction.
 type CompactionEvent struct {
 	Summary         string
@@ -1491,6 +1491,15 @@ func (m *Kit) generate(ctx context.Context, messages []fantasy.Message) (*agent.
 				IsStderr:   isStderr,
 			})
 		},
+		func(inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens int64) {
+			// Emit step usage event for real-time cost tracking
+			m.events.emit(StepUsageEvent{
+				InputTokens:      uint64(inputTokens),
+				OutputTokens:     uint64(outputTokens),
+				CacheReadTokens:  uint64(cacheReadTokens),
+				CacheWriteTokens: uint64(cacheCreationTokens),
+			})
+		},
 	)
 }