fix(pkg): transparently handle <think> tags in stream

Move reasoning tag detection from the provider and UI layers into the agent layer. This prevents raw XML tags from leaking into text streams while ensuring structured reasoning events are emitted correctly for all callers.
2026-06-14 03:30:26 +00:00 · 2026-04-03 13:49:12 +03:00
parent 788e3b71fd
commit 35982b41ad
3 changed files with 46 additions and 186 deletions
@@ -10,7 +10,6 @@ import (
 	"maps"
 	"net/http"
 	"os"
-	"regexp"
 	"strings"
 	"time"

@@ -1000,128 +999,6 @@ func createVercelProvider(ctx context.Context, config *ProviderConfig, modelName
 	return &ProviderResult{Model: model}, nil
 }

-// thinkTagRegex matches <think>...</think> tags for extracting reasoning content
-// from models that wrap thinking in XML-like tags (e.g., Qwen, DeepSeek).
-var thinkTagRegex = regexp.MustCompile(`(?s)<think>(.*?)</think>`)
-
-// customExtraContentFunc extracts reasoning from <think> tags in the content field.
-// This handles models like Qwen and DeepSeek that return reasoning wrapped in XML tags
-// rather than using a separate reasoning_content field.
-func customExtraContentFunc(choice openaisdk.ChatCompletionChoice) []fantasy.Content {
-	var content []fantasy.Content
-	if choice.Message.Content == "" {
-		return content
-	}
-
-	// Check for <think> tags in the content
-	matches := thinkTagRegex.FindStringSubmatch(choice.Message.Content)
-	if len(matches) > 1 {
-		// Found reasoning content in <think> tags
-		reasoning := strings.TrimSpace(matches[1])
-		if reasoning != "" {
-			content = append(content, fantasy.ReasoningContent{
-				Text: reasoning,
-			})
-		}
-	}
-
-	return content
-}
-
-// customStreamExtraFunc handles streaming responses with <think> tags.
-// It extracts reasoning content and emits proper reasoning events.
-func customStreamExtraFunc(
-	chunk openaisdk.ChatCompletionChunk,
-	yield func(fantasy.StreamPart) bool,
-	ctx map[string]any,
-) (map[string]any, bool) {
-	if len(chunk.Choices) == 0 {
-		return ctx, true
-	}
-
-	const reasoningStartedKey = "reasoning_started"
-	const reasoningBufferKey = "reasoning_buffer"
-	const inThinkTagKey = "in_think_tag"
-
-	reasoningStarted, _ := ctx[reasoningStartedKey].(bool)
-	inThinkTag, _ := ctx[inThinkTagKey].(bool)
-	reasoningBuffer, _ := ctx[reasoningBufferKey].(string)
-
-	for i, choice := range chunk.Choices {
-		content := choice.Delta.Content
-		if content == "" {
-			continue
-		}
-
-		// Check for <think> tag start
-		if strings.Contains(content, "<think>") {
-			inThinkTag = true
-			ctx[inThinkTagKey] = true
-
-			// Emit reasoning start event
-			if !reasoningStarted {
-				reasoningStarted = true
-				ctx[reasoningStartedKey] = true
-				if !yield(fantasy.StreamPart{
-					Type: fantasy.StreamPartTypeReasoningStart,
-					ID:   fmt.Sprintf("%d", i),
-				}) {
-					return ctx, false
-				}
-			}
-
-			// Extract content after <think>
-			parts := strings.SplitN(content, "<think>", 2)
-			if len(parts) > 1 && parts[1] != "" {
-				reasoningBuffer += parts[1]
-				ctx[reasoningBufferKey] = reasoningBuffer
-			}
-			continue
-		}
-
-		// Check for </think> tag end
-		if strings.Contains(content, "</think>") {
-			inThinkTag = false
-			ctx[inThinkTagKey] = false
-
-			// Extract content before </think>
-			parts := strings.SplitN(content, "</think>", 2)
-			if len(parts) > 0 {
-				reasoningBuffer += parts[0]
-			}
-
-			// Emit the accumulated reasoning
-			if reasoningBuffer != "" {
-				if !yield(fantasy.StreamPart{
-					Type:  fantasy.StreamPartTypeReasoningDelta,
-					ID:    fmt.Sprintf("%d", i),
-					Delta: reasoningBuffer,
-				}) {
-					return ctx, false
-				}
-				ctx[reasoningBufferKey] = ""
-			}
-
-			// Emit reasoning end
-			if !yield(fantasy.StreamPart{
-				Type: fantasy.StreamPartTypeReasoningEnd,
-				ID:   fmt.Sprintf("%d", i),
-			}) {
-				return ctx, false
-			}
-			continue
-		}
-
-		// Accumulate reasoning content while in think tag
-		if inThinkTag {
-			reasoningBuffer += content
-			ctx[reasoningBufferKey] = reasoningBuffer
-		}
-	}
-
-	return ctx, true
-}
-
 // customToPromptFunc converts prompts to OpenAI format using the default conversion.
 func customToPromptFunc(prompt fantasy.Prompt, systemPrompt, user string) ([]openaisdk.ChatCompletionMessageParamUnion, []fantasy.CallWarning) {
 	return openai.DefaultToPrompt(prompt, systemPrompt, user)
@@ -1153,15 +1030,13 @@ func createCustomProvider(ctx context.Context, config *ProviderConfig, modelName
 		apiKey = "custom"
 	}

-	// Use the openai provider directly with custom hooks to handle <think> tags
-	// from models like Qwen and DeepSeek that wrap reasoning in XML tags.
+	// <think> tag extraction is handled transparently at the agent layer,
+	// so no provider-level hooks are needed here.
 	var opts []openai.Option
 	opts = append(opts, openai.WithBaseURL(baseURL))
 	opts = append(opts, openai.WithAPIKey(apiKey))
 	opts = append(opts, openai.WithName("custom"))
 	opts = append(opts, openai.WithLanguageModelOptions(
-		openai.WithLanguageModelExtraContentFunc(customExtraContentFunc),
-		openai.WithLanguageModelStreamExtraFunc(customStreamExtraFunc),
 		openai.WithLanguageModelToPromptFunc(customToPromptFunc),
 	))

@@ -2,7 +2,6 @@ package ui

 import (
 	"fmt"
-	"regexp"
 	"strings"
 	"time"

@@ -14,17 +13,6 @@ import (
 	"github.com/mark3labs/kit/internal/ui/style"
 )

-// thinkTagRegex matches  ...  tags that some models (Qwen, DeepSeek) wrap
-// reasoning content in. Used to strip these tags from streaming text content.
-// The (?s) flag makes . match newlines.
-var thinkTagRegex = regexp.MustCompile(`(?s)` + `` + `think` + `` + `(.*?)` + `` + `/think` + ``)
-
-// thinkTagOpen and thinkTagClose are the opening and closing think tag strings.
-const (
-	thinkTagOpen  = "<think>"
-	thinkTagClose = "</think>"
-)
-
 // knightRiderFrames generates a KITT-style scanning animation where a bright
 // light bounces back and forth across a row of dots with a trailing glow.
 // Colors are derived from the active theme. Used by StreamComponent (TUI
@@ -207,10 +195,6 @@ type StreamComponent struct {
 	// reasoningDuration holds the total reasoning time, frozen when streaming text begins.
 	reasoningDuration time.Duration

-	// inThinkTag tracks whether we're currently inside a  section
-	// from models that wrap reasoning in XML-like tags (Qwen, DeepSeek).
-	inThinkTag bool
-
 	// renderer renders streaming assistant text.
 	renderer Renderer

@@ -319,9 +303,7 @@ func (s *StreamComponent) GetRenderedContent() string {
 // Called before reading content for output or on flush tick.
 func (s *StreamComponent) commitPending() {
 	if s.pendingStream.Len() > 0 {
-		// Strip  ...  tags that some models wrap reasoning in
-		cleanedText := thinkTagRegex.ReplaceAllString(s.pendingStream.String(), "")
-		s.streamContent.WriteString(cleanedText)
+		s.streamContent.WriteString(s.pendingStream.String())
 		s.pendingStream.Reset()
 	}
 	if s.pendingReasoning.Len() > 0 {
@@ -422,43 +404,9 @@ func (s *StreamComponent) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 			s.reasoningDuration = time.Since(s.reasoningStartTime)
 		}

-		// Handle models that wrap reasoning in  tags (Qwen, DeepSeek)
-		// Filter out all content between  and  tags
-		content := msg.Content
-
-		// Check for opening tag
-		if strings.Contains(content, thinkTagOpen) {
-			parts := strings.SplitN(content, thinkTagOpen, 2)
-			// Content before the tag can be written
-			if !s.inThinkTag && parts[0] != "" {
-				s.pendingStream.WriteString(parts[0])
-			}
-			s.inThinkTag = true
-			// Content after the opening tag is reasoning - don't write it
-			if len(parts) > 1 && parts[1] != "" {
-				// Check if the same chunk contains the closing tag
-				if strings.Contains(parts[1], thinkTagClose) {
-					innerParts := strings.SplitN(parts[1], thinkTagClose, 2)
-					s.inThinkTag = false
-					// Content after closing tag can be written
-					if len(innerParts) > 1 && innerParts[1] != "" {
-						s.pendingStream.WriteString(innerParts[1])
-					}
-				}
-			}
-		} else if strings.Contains(content, thinkTagClose) {
-			// Closing tag found
-			parts := strings.SplitN(content, thinkTagClose, 2)
-			s.inThinkTag = false
-			// Content after closing tag can be written
-			if len(parts) > 1 && parts[1] != "" {
-				s.pendingStream.WriteString(parts[1])
-			}
-		} else if !s.inThinkTag {
-			// Normal content, not inside think tags
-			s.pendingStream.WriteString(content)
-		}
-		// else: inside think tag, don't write this content
+		// <think> tag filtering is handled at the agent layer — chunks here
+		// are already clean text.
+		s.pendingStream.WriteString(msg.Content)

 		if !s.flushPending && s.pendingStream.Len() > 0 {
 			s.flushPending = true
@@ -1153,9 +1153,46 @@ func (m *Kit) generate(ctx context.Context, messages []fantasy.Message) (*agent.
 		func(content string) {
 			m.events.emit(ToolCallContentEvent{Content: content})
 		},
-		func(chunk string) {
-			m.events.emit(MessageUpdateEvent{Chunk: chunk})
-		},
+		// <think> tag filtering: models like Qwen/DeepSeek wrap reasoning inside
+		// <think>...</think> tags in the regular text stream. We intercept those
+		// spans here and re-route them as ReasoningDeltaEvent/ReasoningCompleteEvent
+		// so callers always receive clean, tag-free text and structured reasoning.
+		func() func(chunk string) {
+			const (
+				thinkOpen  = "<think>"
+				thinkClose = "</think>"
+			)
+			var inThinkTag bool
+			return func(chunk string) {
+				remaining := chunk
+				for remaining != "" {
+					if inThinkTag {
+						i := strings.Index(remaining, thinkClose)
+						if i == -1 {
+							m.events.emit(ReasoningDeltaEvent{Delta: remaining})
+							return
+						}
+						if i > 0 {
+							m.events.emit(ReasoningDeltaEvent{Delta: remaining[:i]})
+						}
+						inThinkTag = false
+						m.events.emit(ReasoningCompleteEvent{})
+						remaining = remaining[i+len(thinkClose):]
+					} else {
+						i := strings.Index(remaining, thinkOpen)
+						if i == -1 {
+							m.events.emit(MessageUpdateEvent{Chunk: remaining})
+							return
+						}
+						if i > 0 {
+							m.events.emit(MessageUpdateEvent{Chunk: remaining[:i]})
+						}
+						inThinkTag = true
+						remaining = remaining[i+len(thinkOpen):]
+					}
+				}
+			}
+		}(),
 		func(delta string) {
 			m.events.emit(ReasoningDeltaEvent{Delta: delta})
 		},