fix token tracking

This commit is contained in:
Ed Zynda
2025-06-27 11:33:44 +03:00
parent daf5c41ac9
commit acade83e37
5 changed files with 95 additions and 44 deletions
+22 -14
View File
@@ -794,6 +794,23 @@ func runAgenticStep(ctx context.Context, mcpAgent *agent.Agent, cli *ui.CLI, mes
response := result.FinalResponse
conversationMessages := result.ConversationMessages
// Extract the last user message for usage tracking (do this once)
lastUserMessage := ""
if len(messages) > 0 {
// Find the last user message
for i := len(messages) - 1; i >= 0; i-- {
if messages[i].Role == schema.User {
lastUserMessage = messages[i].Content
break
}
}
}
// Update usage tracking for ALL responses (streaming and non-streaming)
if !config.Quiet && cli != nil {
cli.UpdateUsageFromResponse(response, lastUserMessage)
}
// Display assistant response with model name
// Skip if: quiet mode, same content already displayed, or if streaming completed the full response
streamedFullResponse := responseWasStreamed && streamingContent.String() == response.Content
@@ -802,25 +819,16 @@ func runAgenticStep(ctx context.Context, mcpAgent *agent.Agent, cli *ui.CLI, mes
cli.DisplayError(fmt.Errorf("display error: %v", err))
return nil, nil, err
}
} else if streamedFullResponse {
// Streaming was used - the message is already displayed in the message component
// Just update usage tracking with the last user message and response
if len(messages) > 0 {
lastUserMessage := ""
// Find the last user message
for i := len(messages) - 1; i >= 0; i-- {
if messages[i].Role == schema.User {
lastUserMessage = messages[i].Content
break
}
}
cli.UpdateUsageFromResponse(response, lastUserMessage)
}
} else if config.Quiet {
// In quiet mode, only output the final response content to stdout
fmt.Print(response.Content)
}
// Display usage information immediately after the response (for both streaming and non-streaming)
if !config.Quiet && cli != nil {
cli.DisplayUsageAfterResponse()
}
// Return the final response and all conversation messages
return response, conversationMessages, nil
}
+2 -10
View File
@@ -158,8 +158,8 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []*sc
// Add response to working messages
workingMessages = append(workingMessages, response)
// Check if this is a tool call or final response
if len(response.ToolCalls) > 0 {
// Check if this is a tool call or final response
if len(response.ToolCalls) > 0 {
// Display any content that accompanies the tool calls
if response.Content != "" && onToolCallContent != nil {
onToolCallContent(response.Content)
@@ -251,8 +251,6 @@ func (a *Agent) GetLoadingMessage() string {
return a.loadingMessage
}
// generateWithCancellationAndStreaming calls the LLM with ESC key cancellation support and streaming callbacks
func (a *Agent) generateWithCancellationAndStreaming(ctx context.Context, messages []*schema.Message, toolInfos []*schema.ToolInfo, streamingCallback StreamingResponseHandler) (*schema.Message, error) {
// Check if streaming is enabled
@@ -271,10 +269,6 @@ func (a *Agent) generateWithCancellationAndStreaming(ctx context.Context, messag
return a.generateWithStreamingFirstAndCallback(ctx, messages, toolInfos, streamingCallback)
}
// generateWithStreamingAndCallback uses streaming for responses without tool calls with real-time callbacks
func (a *Agent) generateWithStreamingAndCallback(ctx context.Context, messages []*schema.Message, toolInfos []*schema.ToolInfo, callback StreamingResponseHandler) (*schema.Message, error) {
// Try streaming first
@@ -324,8 +318,6 @@ func (a *Agent) generateWithStreamingFirstAndCallback(ctx context.Context, messa
return response, nil
}
// generateWithoutStreaming uses the traditional non-streaming approach
func (a *Agent) generateWithoutStreaming(ctx context.Context, messages []*schema.Message, toolInfos []*schema.ToolInfo) (*schema.Message, error) {
// Create a cancellable context for just this LLM call
+41 -5
View File
@@ -21,6 +21,7 @@ func StreamWithCallback(ctx context.Context, reader *schema.StreamReader[*schema
var content strings.Builder
var accumulatedToolCalls map[string]*schema.ToolCall // Track tool calls by ID to handle incremental updates
var streamComplete bool
var finalResponseMeta *schema.ResponseMeta // Accumulate response metadata from all chunks
accumulatedToolCalls = make(map[string]*schema.ToolCall)
@@ -49,6 +50,40 @@ func StreamWithCallback(ctx context.Context, reader *schema.StreamReader[*schema
// Accumulate content from all chunks
content.WriteString(msg.Content)
// Accumulate response metadata - merge from multiple chunks for accuracy
if msg.ResponseMeta != nil {
if finalResponseMeta == nil {
// First metadata we've seen - use as base
finalResponseMeta = &schema.ResponseMeta{}
if msg.ResponseMeta.Usage != nil {
finalResponseMeta.Usage = &schema.TokenUsage{}
}
}
// Merge metadata intelligently to handle Anthropic's streaming behavior
if msg.ResponseMeta.Usage != nil && finalResponseMeta.Usage != nil {
usage := msg.ResponseMeta.Usage
// Take PromptTokens from first chunk that has them (usually non-zero)
if finalResponseMeta.Usage.PromptTokens == 0 && usage.PromptTokens > 0 {
finalResponseMeta.Usage.PromptTokens = usage.PromptTokens
}
// Always take the latest CompletionTokens (accumulates over chunks)
if usage.CompletionTokens > 0 {
finalResponseMeta.Usage.CompletionTokens = usage.CompletionTokens
}
// Calculate TotalTokens from the components
finalResponseMeta.Usage.TotalTokens = finalResponseMeta.Usage.PromptTokens + finalResponseMeta.Usage.CompletionTokens
}
// Preserve other metadata fields from the latest chunk
if msg.ResponseMeta.FinishReason != "" {
finalResponseMeta.FinishReason = msg.ResponseMeta.FinishReason
}
}
// Accumulate tool calls incrementally - Anthropic streams them piece by piece
// NOTE: We don't process these tool calls until EOF is reached
if len(msg.ToolCalls) > 0 {
@@ -101,10 +136,11 @@ func StreamWithCallback(ctx context.Context, reader *schema.StreamReader[*schema
}
}
// Return complete message with all content and final tool calls
// Return complete message with all content, final tool calls, and preserved metadata
return &schema.Message{
Role: schema.Assistant,
Content: content.String(),
ToolCalls: finalToolCalls,
Role: schema.Assistant,
Content: content.String(),
ToolCalls: finalToolCalls,
ResponseMeta: finalResponseMeta, // Preserve usage and other metadata from streaming
}, nil
}
}
+1 -1
View File
@@ -167,4 +167,4 @@ func contains(s, substr string) bool {
}
}
return false
}
}
+29 -14
View File
@@ -47,16 +47,8 @@ func (c *CLI) SetUsageTracker(tracker *UsageTracker) {
// GetPrompt gets user input using the huh library with divider and padding
func (c *CLI) GetPrompt() (string, error) {
// Display usage info if available
if c.usageTracker != nil {
usageInfo := c.usageTracker.RenderUsageInfo()
if usageInfo != "" {
paddedUsage := lipgloss.NewStyle().
PaddingLeft(2).
Render(usageInfo)
fmt.Print(paddedUsage)
}
}
// Usage info is now displayed immediately after responses via DisplayUsageAfterResponse()
// No need to display it here to avoid duplication
// Create an enhanced divider with gradient effect
theme := GetTheme()
@@ -357,11 +349,18 @@ func (c *CLI) UpdateUsageFromResponse(response *schema.Message, inputText string
inputTokens := int(usage.PromptTokens)
outputTokens := int(usage.CompletionTokens)
// Handle cache tokens if available (some providers support this)
cacheReadTokens := 0
cacheWriteTokens := 0
// Validate that the metadata seems reasonable
// If token counts are 0 or seem unrealistic, fall back to estimation
if inputTokens > 0 && outputTokens > 0 {
// Handle cache tokens if available (some providers support this)
cacheReadTokens := 0
cacheWriteTokens := 0
c.usageTracker.UpdateUsage(inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens)
c.usageTracker.UpdateUsage(inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens)
} else {
// Metadata exists but seems incomplete/unreliable, use estimation
c.usageTracker.EstimateAndUpdateUsage(inputText, response.Content)
}
} else {
// Fallback to estimation if no metadata is available
c.usageTracker.EstimateAndUpdateUsage(inputText, response.Content)
@@ -405,6 +404,22 @@ func (c *CLI) ResetUsageStats() {
c.DisplayInfo("Usage statistics have been reset.")
}
// DisplayUsageAfterResponse displays usage information immediately after a response
func (c *CLI) DisplayUsageAfterResponse() {
if c.usageTracker == nil {
return
}
usageInfo := c.usageTracker.RenderUsageInfo()
if usageInfo != "" {
paddedUsage := lipgloss.NewStyle().
PaddingLeft(2).
PaddingTop(1).
Render(usageInfo)
fmt.Print(paddedUsage)
}
}
// updateSize updates the CLI size based on terminal dimensions
func (c *CLI) updateSize() {
width, height, err := term.GetSize(int(os.Stdout.Fd()))