fix(pkg): transparently handle <think> tags in stream

Move reasoning tag detection from the provider and UI layers into the agent layer. This prevents raw XML tags from leaking into text streams while ensuring structured reasoning events are emitted correctly for all callers.
This commit is contained in:
Ed Zynda
2026-04-03 13:49:12 +03:00
parent 788e3b71fd
commit 35982b41ad
3 changed files with 46 additions and 186 deletions
+2 -127
View File
@@ -10,7 +10,6 @@ import (
"maps"
"net/http"
"os"
"regexp"
"strings"
"time"
@@ -1000,128 +999,6 @@ func createVercelProvider(ctx context.Context, config *ProviderConfig, modelName
return &ProviderResult{Model: model}, nil
}
// thinkTagRegex matches <think>...</think> tags for extracting reasoning content
// from models that wrap thinking in XML-like tags (e.g., Qwen, DeepSeek).
var thinkTagRegex = regexp.MustCompile(`(?s)<think>(.*?)</think>`)
// customExtraContentFunc extracts reasoning from <think> tags in the content field.
// This handles models like Qwen and DeepSeek that return reasoning wrapped in XML tags
// rather than using a separate reasoning_content field.
func customExtraContentFunc(choice openaisdk.ChatCompletionChoice) []fantasy.Content {
var content []fantasy.Content
if choice.Message.Content == "" {
return content
}
// Check for <think> tags in the content
matches := thinkTagRegex.FindStringSubmatch(choice.Message.Content)
if len(matches) > 1 {
// Found reasoning content in <think> tags
reasoning := strings.TrimSpace(matches[1])
if reasoning != "" {
content = append(content, fantasy.ReasoningContent{
Text: reasoning,
})
}
}
return content
}
// customStreamExtraFunc handles streaming responses with <think> tags.
// It extracts reasoning content and emits proper reasoning events.
func customStreamExtraFunc(
chunk openaisdk.ChatCompletionChunk,
yield func(fantasy.StreamPart) bool,
ctx map[string]any,
) (map[string]any, bool) {
if len(chunk.Choices) == 0 {
return ctx, true
}
const reasoningStartedKey = "reasoning_started"
const reasoningBufferKey = "reasoning_buffer"
const inThinkTagKey = "in_think_tag"
reasoningStarted, _ := ctx[reasoningStartedKey].(bool)
inThinkTag, _ := ctx[inThinkTagKey].(bool)
reasoningBuffer, _ := ctx[reasoningBufferKey].(string)
for i, choice := range chunk.Choices {
content := choice.Delta.Content
if content == "" {
continue
}
// Check for <think> tag start
if strings.Contains(content, "<think>") {
inThinkTag = true
ctx[inThinkTagKey] = true
// Emit reasoning start event
if !reasoningStarted {
reasoningStarted = true
ctx[reasoningStartedKey] = true
if !yield(fantasy.StreamPart{
Type: fantasy.StreamPartTypeReasoningStart,
ID: fmt.Sprintf("%d", i),
}) {
return ctx, false
}
}
// Extract content after <think>
parts := strings.SplitN(content, "<think>", 2)
if len(parts) > 1 && parts[1] != "" {
reasoningBuffer += parts[1]
ctx[reasoningBufferKey] = reasoningBuffer
}
continue
}
// Check for </think> tag end
if strings.Contains(content, "</think>") {
inThinkTag = false
ctx[inThinkTagKey] = false
// Extract content before </think>
parts := strings.SplitN(content, "</think>", 2)
if len(parts) > 0 {
reasoningBuffer += parts[0]
}
// Emit the accumulated reasoning
if reasoningBuffer != "" {
if !yield(fantasy.StreamPart{
Type: fantasy.StreamPartTypeReasoningDelta,
ID: fmt.Sprintf("%d", i),
Delta: reasoningBuffer,
}) {
return ctx, false
}
ctx[reasoningBufferKey] = ""
}
// Emit reasoning end
if !yield(fantasy.StreamPart{
Type: fantasy.StreamPartTypeReasoningEnd,
ID: fmt.Sprintf("%d", i),
}) {
return ctx, false
}
continue
}
// Accumulate reasoning content while in think tag
if inThinkTag {
reasoningBuffer += content
ctx[reasoningBufferKey] = reasoningBuffer
}
}
return ctx, true
}
// customToPromptFunc converts prompts to OpenAI format using the default conversion.
func customToPromptFunc(prompt fantasy.Prompt, systemPrompt, user string) ([]openaisdk.ChatCompletionMessageParamUnion, []fantasy.CallWarning) {
return openai.DefaultToPrompt(prompt, systemPrompt, user)
@@ -1153,15 +1030,13 @@ func createCustomProvider(ctx context.Context, config *ProviderConfig, modelName
apiKey = "custom"
}
// Use the openai provider directly with custom hooks to handle <think> tags
// from models like Qwen and DeepSeek that wrap reasoning in XML tags.
// <think> tag extraction is handled transparently at the agent layer,
// so no provider-level hooks are needed here.
var opts []openai.Option
opts = append(opts, openai.WithBaseURL(baseURL))
opts = append(opts, openai.WithAPIKey(apiKey))
opts = append(opts, openai.WithName("custom"))
opts = append(opts, openai.WithLanguageModelOptions(
openai.WithLanguageModelExtraContentFunc(customExtraContentFunc),
openai.WithLanguageModelStreamExtraFunc(customStreamExtraFunc),
openai.WithLanguageModelToPromptFunc(customToPromptFunc),
))
+4 -56
View File
@@ -2,7 +2,6 @@ package ui
import (
"fmt"
"regexp"
"strings"
"time"
@@ -14,17 +13,6 @@ import (
"github.com/mark3labs/kit/internal/ui/style"
)
// thinkTagRegex matches ... tags that some models (Qwen, DeepSeek) wrap
// reasoning content in. Used to strip these tags from streaming text content.
// The (?s) flag makes . match newlines.
var thinkTagRegex = regexp.MustCompile(`(?s)` + `` + `think` + `` + `(.*?)` + `` + `/think` + ``)
// thinkTagOpen and thinkTagClose are the opening and closing think tag strings.
const (
thinkTagOpen = "<think>"
thinkTagClose = "</think>"
)
// knightRiderFrames generates a KITT-style scanning animation where a bright
// light bounces back and forth across a row of dots with a trailing glow.
// Colors are derived from the active theme. Used by StreamComponent (TUI
@@ -207,10 +195,6 @@ type StreamComponent struct {
// reasoningDuration holds the total reasoning time, frozen when streaming text begins.
reasoningDuration time.Duration
// inThinkTag tracks whether we're currently inside a section
// from models that wrap reasoning in XML-like tags (Qwen, DeepSeek).
inThinkTag bool
// renderer renders streaming assistant text.
renderer Renderer
@@ -319,9 +303,7 @@ func (s *StreamComponent) GetRenderedContent() string {
// Called before reading content for output or on flush tick.
func (s *StreamComponent) commitPending() {
if s.pendingStream.Len() > 0 {
// Strip ... tags that some models wrap reasoning in
cleanedText := thinkTagRegex.ReplaceAllString(s.pendingStream.String(), "")
s.streamContent.WriteString(cleanedText)
s.streamContent.WriteString(s.pendingStream.String())
s.pendingStream.Reset()
}
if s.pendingReasoning.Len() > 0 {
@@ -422,43 +404,9 @@ func (s *StreamComponent) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
s.reasoningDuration = time.Since(s.reasoningStartTime)
}
// Handle models that wrap reasoning in tags (Qwen, DeepSeek)
// Filter out all content between and tags
content := msg.Content
// Check for opening tag
if strings.Contains(content, thinkTagOpen) {
parts := strings.SplitN(content, thinkTagOpen, 2)
// Content before the tag can be written
if !s.inThinkTag && parts[0] != "" {
s.pendingStream.WriteString(parts[0])
}
s.inThinkTag = true
// Content after the opening tag is reasoning - don't write it
if len(parts) > 1 && parts[1] != "" {
// Check if the same chunk contains the closing tag
if strings.Contains(parts[1], thinkTagClose) {
innerParts := strings.SplitN(parts[1], thinkTagClose, 2)
s.inThinkTag = false
// Content after closing tag can be written
if len(innerParts) > 1 && innerParts[1] != "" {
s.pendingStream.WriteString(innerParts[1])
}
}
}
} else if strings.Contains(content, thinkTagClose) {
// Closing tag found
parts := strings.SplitN(content, thinkTagClose, 2)
s.inThinkTag = false
// Content after closing tag can be written
if len(parts) > 1 && parts[1] != "" {
s.pendingStream.WriteString(parts[1])
}
} else if !s.inThinkTag {
// Normal content, not inside think tags
s.pendingStream.WriteString(content)
}
// else: inside think tag, don't write this content
// <think> tag filtering is handled at the agent layer — chunks here
// are already clean text.
s.pendingStream.WriteString(msg.Content)
if !s.flushPending && s.pendingStream.Len() > 0 {
s.flushPending = true
+40 -3
View File
@@ -1153,9 +1153,46 @@ func (m *Kit) generate(ctx context.Context, messages []fantasy.Message) (*agent.
func(content string) {
m.events.emit(ToolCallContentEvent{Content: content})
},
func(chunk string) {
m.events.emit(MessageUpdateEvent{Chunk: chunk})
},
// <think> tag filtering: models like Qwen/DeepSeek wrap reasoning inside
// <think>...</think> tags in the regular text stream. We intercept those
// spans here and re-route them as ReasoningDeltaEvent/ReasoningCompleteEvent
// so callers always receive clean, tag-free text and structured reasoning.
func() func(chunk string) {
const (
thinkOpen = "<think>"
thinkClose = "</think>"
)
var inThinkTag bool
return func(chunk string) {
remaining := chunk
for remaining != "" {
if inThinkTag {
i := strings.Index(remaining, thinkClose)
if i == -1 {
m.events.emit(ReasoningDeltaEvent{Delta: remaining})
return
}
if i > 0 {
m.events.emit(ReasoningDeltaEvent{Delta: remaining[:i]})
}
inThinkTag = false
m.events.emit(ReasoningCompleteEvent{})
remaining = remaining[i+len(thinkClose):]
} else {
i := strings.Index(remaining, thinkOpen)
if i == -1 {
m.events.emit(MessageUpdateEvent{Chunk: remaining})
return
}
if i > 0 {
m.events.emit(MessageUpdateEvent{Chunk: remaining[:i]})
}
inThinkTag = true
remaining = remaining[i+len(thinkOpen):]
}
}
}
}(),
func(delta string) {
m.events.emit(ReasoningDeltaEvent{Delta: delta})
},