Compare commits

...

3 Commits

Author SHA1 Message Date
Ed Zynda 09919b6307 feat: update token usage after each step in multi-step turns
Previously, token usage and costs were only updated at the end of a complete
turn. For long-running multi-step tool-calling conversations, this meant the
status bar showed stale (or zero) costs during the entire interaction.

Now, after each complete step (tool call + result), the usage tracker is
updated with the actual token counts from that step. This provides real-time
cost accumulation visible in the status bar.

Changes:
- Add StepUsageHandler type and onStepUsage parameter to agent
- Emit StepUsageEvent from kit layer after each step completes
- Handle StepUsageEvent in app layer to update UsageTracker
- Add EventStepUsage constant and StepUsageEvent struct to events

The step usage is additive - each step's tokens are added to the running
session totals, just like the final turn usage was before.
2026-03-25 18:17:48 +03:00
Ed Zynda 7a2de4cc3c fix: update token counting when switching models mid-session
When switching models (e.g., via /model command or ctx.SetModel), the usage
tracker now updates its model info to reflect the new model's:
- Pricing for cost calculations
- Context limits for percentage display
- OAuth status (to show bash costs when using OAuth creds)

Previously, token costs and context percentages continued using the old
model's settings after a switch, causing incorrect display for:
- Users switching from paid to free/OAuth models
- Users switching between models with different pricing

Changes:
- Add UpdateModelInfo() method to UsageTracker
- Call UpdateModelInfo() in both SetModel callbacks (extension and UI)
- Add auth import for OAuth detection in root.go
2026-03-25 18:09:36 +03:00
Ed Zynda acd7fd7f45 feat(ui): add line truncation to bash streaming output
Add width and count truncation to renderStreamingBashOutput to prevent
long-running commands from blowing up the TUI layout:

- Per-line width truncation via truncateLine() (ANSI-aware, matches final
  bash tool renderer behavior)
- Display cap at maxBashLines (20) showing the tail (latest output)
- Truncation hint '...(N more lines above)' when lines are hidden

The buffer still accumulates up to 50 lines for context, but only the
last 20 are rendered during streaming. This is consistent with how the
final bash tool result is displayed.
2026-03-25 18:02:50 +03:00
7 changed files with 129 additions and 4 deletions
+37
View File
@@ -13,6 +13,7 @@ import (
"charm.land/fantasy"
"charm.land/lipgloss/v2"
"github.com/mark3labs/kit/internal/app"
"github.com/mark3labs/kit/internal/auth"
"github.com/mark3labs/kit/internal/config"
"github.com/mark3labs/kit/internal/extensions"
"github.com/mark3labs/kit/internal/models"
@@ -955,6 +956,24 @@ func runNormalMode(ctx context.Context) error {
kitInstance.UpdateExtensionContextModel(modelString)
// Fire OnModelChange event to extensions.
kitInstance.EmitModelChange(modelString, previousModel, "extension")
// Update usage tracker with new model info for correct token counting.
if usageTracker != nil {
newProvider, newModel, _ := models.ParseModelString(modelString)
if newProvider != "unknown" && newModel != "unknown" && newProvider != "ollama" {
registry := models.GetGlobalRegistry()
if modelInfo := registry.LookupModel(newProvider, newModel); modelInfo != nil {
// Check OAuth status for Anthropic models
isOAuth := false
if newProvider == "anthropic" {
_, source, err := auth.GetAnthropicAPIKey(viper.GetString("provider-api-key"))
if err == nil && strings.HasPrefix(source, "stored OAuth") {
isOAuth = true
}
}
usageTracker.UpdateModelInfo(modelInfo, newProvider, isOAuth)
}
}
}
return nil
},
GetAvailableModels: func() []extensions.ModelInfoEntry {
@@ -1152,6 +1171,24 @@ func runNormalMode(ctx context.Context) error {
// this callback runs synchronously inside BubbleTea's Update(), and
// NotifyModelChanged calls prog.Send() which deadlocks. The UI layer
// updates m.providerName and m.modelName directly after setModel returns.
// Update usage tracker with new model info for correct token counting.
if usageTracker != nil {
newProvider, newModel, _ := models.ParseModelString(modelString)
if newProvider != "unknown" && newModel != "unknown" && newProvider != "ollama" {
registry := models.GetGlobalRegistry()
if modelInfo := registry.LookupModel(newProvider, newModel); modelInfo != nil {
// Check OAuth status for Anthropic models
isOAuth := false
if newProvider == "anthropic" {
_, source, err := auth.GetAnthropicAPIKey(viper.GetString("provider-api-key"))
if err == nil && strings.HasPrefix(source, "stored OAuth") {
isOAuth = true
}
}
usageTracker.UpdateModelInfo(modelInfo, newProvider, isOAuth)
}
}
}
return nil
}
emitModelChangeForUI := func(newModel, previousModel, source string) {
+12 -1
View File
@@ -70,6 +70,11 @@ type ReasoningDeltaHandler func(delta string)
// Note: This is an alias for core.ToolOutputCallback to avoid import cycles.
type ToolOutputHandler = core.ToolOutputCallback
// StepUsageHandler is a function type for handling token usage after each
// complete step in a multi-step agent turn. This enables real-time cost
// tracking during long-running tool-calling conversations.
type StepUsageHandler func(inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens int64)
// Agent represents an AI agent with core tool integration using the fantasy library.
// Core tools (bash, read, write, edit, grep, find, ls) are registered as direct
// fantasy.AgentTool implementations — no MCP layer, no serialization overhead.
@@ -225,7 +230,7 @@ func (a *Agent) GenerateWithLoop(ctx context.Context, messages []fantasy.Message
onResponse ResponseHandler, onToolCallContent ToolCallContentHandler,
) (*GenerateWithLoopResult, error) {
return a.GenerateWithLoopAndStreaming(ctx, messages, onToolCall, onToolExecution, onToolResult,
onResponse, onToolCallContent, nil, nil, nil)
onResponse, onToolCallContent, nil, nil, nil, nil)
}
// GenerateWithLoopAndStreaming processes messages using the fantasy agent with streaming and callbacks.
@@ -237,6 +242,7 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
onStreamingResponse StreamingResponseHandler,
onReasoningDelta ReasoningDeltaHandler,
onToolOutput ToolOutputHandler,
onStepUsage StepUsageHandler,
) (*GenerateWithLoopResult, error) {
// Inject tool output handler into context for use by core tools (e.g., bash).
@@ -351,6 +357,11 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
if text != "" && len(toolCalls) > 0 && onToolCallContent != nil {
onToolCallContent(text)
}
// Emit step usage for real-time cost tracking
if onStepUsage != nil {
onStepUsage(step.Usage.InputTokens, step.Usage.OutputTokens,
step.Usage.CacheReadTokens, step.Usage.CacheCreationTokens)
}
return nil
},
})
+9
View File
@@ -678,6 +678,15 @@ func (a *App) subscribeSDKEvents(sendFn func(tea.Msg)) func() {
Chunk: ev.Chunk,
IsStderr: ev.IsStderr,
})
case kit.StepUsageEvent:
if a.opts.UsageTracker != nil {
a.opts.UsageTracker.UpdateUsage(
int(ev.InputTokens),
int(ev.OutputTokens),
int(ev.CacheReadTokens),
int(ev.CacheWriteTokens),
)
}
}
}))
+37 -3
View File
@@ -1742,6 +1742,8 @@ func (m *AppModel) renderStream() string {
// renderStreamingBashOutput renders accumulated streaming bash output (stdout + stderr)
// below the LLM streaming text. Returns empty string if no bash output is present.
// Lines are truncated to the terminal width and capped to maxBashLines to prevent
// long-running commands from blowing up the TUI layout.
func (m *AppModel) renderStreamingBashOutput(theme Theme) string {
m.streamingMu.RLock()
stdoutLines := make([]string, len(m.streamingBashOutput))
@@ -1755,7 +1757,9 @@ func (m *AppModel) renderStreamingBashOutput(theme Theme) string {
}
const lineIndent = " "
width := m.width - 2 // Account for indent and padding
lineWidth := max(m.width-2-len(lineIndent), 20)
// Account for PaddingLeft(1) on the output/stderr styles.
maxLineChars := lineWidth - 1
outputStyle := lipgloss.NewStyle().
Background(theme.CodeBg).
@@ -1766,17 +1770,47 @@ func (m *AppModel) renderStreamingBashOutput(theme Theme) string {
Background(theme.CodeBg).
PaddingLeft(1)
// Cap displayed lines to maxBashLines (show the tail, since streaming
// output is most useful at the end). The buffer itself is larger to
// preserve context, but we only render the last N lines.
totalLines := len(stdoutLines) + len(stderrLines)
var hiddenCount int
if totalLines > maxBashLines {
hiddenCount = totalLines - maxBashLines
// Trim from stdout first (older output), then stderr.
remaining := maxBashLines
if len(stderrLines) >= remaining {
stdoutLines = nil
stderrLines = stderrLines[len(stderrLines)-remaining:]
} else {
remaining -= len(stderrLines)
if len(stdoutLines) > remaining {
stdoutLines = stdoutLines[len(stdoutLines)-remaining:]
}
}
}
var lines []string
// Truncation hint at the top.
if hiddenCount > 0 {
hint := fmt.Sprintf("...(%d more lines above)", hiddenCount)
hintContent := outputStyle.Width(lineWidth).
Foreground(theme.Muted).Italic(true).Render(hint)
lines = append(lines, lineIndent+hintContent)
}
// Render stdout lines.
for _, line := range stdoutLines {
styled := outputStyle.Width(width - len(lineIndent)).Render(line)
line = truncateLine(strings.TrimRight(line, "\n"), maxLineChars)
styled := outputStyle.Width(lineWidth).Render(line)
lines = append(lines, lineIndent+styled)
}
// Render stderr lines with error styling.
for _, line := range stderrLines {
styled := stderrStyle.Width(width - len(lineIndent)).Render(line)
line = truncateLine(strings.TrimRight(line, "\n"), maxLineChars)
styled := stderrStyle.Width(lineWidth).Render(line)
lines = append(lines, lineIndent+styled)
}
+11
View File
@@ -266,3 +266,14 @@ func (ut *UsageTracker) SetWidth(width int) {
defer ut.mu.Unlock()
ut.width = width
}
// UpdateModelInfo updates the model information and OAuth status when the model
// is switched mid-session. This ensures token costs and context limits are
// calculated correctly for the new model.
func (ut *UsageTracker) UpdateModelInfo(modelInfo *models.ModelInfo, provider string, isOAuth bool) {
ut.mu.Lock()
defer ut.mu.Unlock()
ut.modelInfo = modelInfo
ut.provider = provider
ut.isOAuth = isOAuth
}
+14
View File
@@ -41,6 +41,7 @@ const (
EventReasoningDelta EventType = "reasoning_delta"
// EventToolOutput fires when a tool produces streaming output chunks.
EventToolOutput EventType = "tool_output"
EventStepUsage EventType = "step_usage"
)
// ---------------------------------------------------------------------------
@@ -249,6 +250,19 @@ type ResponseEvent struct {
// EventType implements Event.
func (e ResponseEvent) EventType() EventType { return EventResponse }
// StepUsageEvent fires after each complete step in a multi-step agent turn,
// carrying the token usage for that specific step. This enables real-time
// cost tracking during long-running tool-calling conversations.
type StepUsageEvent struct {
InputTokens uint64
OutputTokens uint64
CacheReadTokens uint64
CacheWriteTokens uint64
}
// EventType implements Event.
func (e StepUsageEvent) EventType() EventType { return EventStepUsage }
// CompactionEvent fires after a successful compaction.
type CompactionEvent struct {
Summary string
+9
View File
@@ -1491,6 +1491,15 @@ func (m *Kit) generate(ctx context.Context, messages []fantasy.Message) (*agent.
IsStderr: isStderr,
})
},
func(inputTokens, outputTokens, cacheReadTokens, cacheCreationTokens int64) {
// Emit step usage event for real-time cost tracking
m.events.emit(StepUsageEvent{
InputTokens: uint64(inputTokens),
OutputTokens: uint64(outputTokens),
CacheReadTokens: uint64(cacheReadTokens),
CacheWriteTokens: uint64(cacheCreationTokens),
})
},
)
}