diff --git a/internal/message/content.go b/internal/message/content.go index b119592e..4ef0bf7f 100644 --- a/internal/message/content.go +++ b/internal/message/content.go @@ -4,12 +4,18 @@ import ( "encoding/json" "errors" "fmt" + "regexp" "strings" "time" "charm.land/fantasy" ) +// thinkTagRegex matches ... tags that some models (Qwen, DeepSeek) wrap +// reasoning content in. Used to strip these tags from text content. +// The (?s) flag makes . match newlines. +var thinkTagRegex = regexp.MustCompile(`(?s)` + `` + `think` + `` + `(.*?)` + `` + `/think` + ``) + // sanitizeToolCallID ensures the ID matches Anthropic's required pattern: // ^[a-zA-Z0-9_-]+$ (alphanumeric, underscores, and hyphens only). // Invalid characters are replaced with underscores. @@ -443,7 +449,11 @@ func FromFantasyMessage(msg fantasy.Message) Message { switch p := part.(type) { case fantasy.TextPart: if p.Text != "" { - m.Parts = append(m.Parts, TextContent{Text: p.Text}) + // Strip ... tags that some models wrap reasoning in + cleanedText := thinkTagRegex.ReplaceAllString(p.Text, "") + if cleanedText != "" { + m.Parts = append(m.Parts, TextContent{Text: cleanedText}) + } } case fantasy.ToolCallPart: m.Parts = append(m.Parts, ToolCall{ diff --git a/internal/models/providers.go b/internal/models/providers.go index 8707d711..4d2f91f2 100644 --- a/internal/models/providers.go +++ b/internal/models/providers.go @@ -10,6 +10,7 @@ import ( "maps" "net/http" "os" + "regexp" "strings" "time" @@ -22,6 +23,7 @@ import ( "charm.land/fantasy/providers/openaicompat" "charm.land/fantasy/providers/openrouter" "charm.land/fantasy/providers/vercel" + openaisdk "github.com/charmbracelet/openai-go" "github.com/mark3labs/kit/internal/auth" "github.com/mark3labs/kit/internal/ui/progress" @@ -998,6 +1000,133 @@ func createVercelProvider(ctx context.Context, config *ProviderConfig, modelName return &ProviderResult{Model: model}, nil } +// thinkTagRegex matches ... tags for extracting reasoning content +// from models that wrap thinking in XML-like tags (e.g., Qwen, DeepSeek). +var thinkTagRegex = regexp.MustCompile(`(?s)(.*?)`) + +// customExtraContentFunc extracts reasoning from tags in the content field. +// This handles models like Qwen and DeepSeek that return reasoning wrapped in XML tags +// rather than using a separate reasoning_content field. +func customExtraContentFunc(choice openaisdk.ChatCompletionChoice) []fantasy.Content { + var content []fantasy.Content + if choice.Message.Content == "" { + return content + } + + // Check for tags in the content + matches := thinkTagRegex.FindStringSubmatch(choice.Message.Content) + if len(matches) > 1 { + // Found reasoning content in tags + reasoning := strings.TrimSpace(matches[1]) + if reasoning != "" { + content = append(content, fantasy.ReasoningContent{ + Text: reasoning, + }) + } + } + + return content +} + +// customStreamExtraFunc handles streaming responses with tags. +// It extracts reasoning content and emits proper reasoning events. +func customStreamExtraFunc( + chunk openaisdk.ChatCompletionChunk, + yield func(fantasy.StreamPart) bool, + ctx map[string]any, +) (map[string]any, bool) { + if len(chunk.Choices) == 0 { + return ctx, true + } + + const reasoningStartedKey = "reasoning_started" + const reasoningBufferKey = "reasoning_buffer" + const inThinkTagKey = "in_think_tag" + + reasoningStarted, _ := ctx[reasoningStartedKey].(bool) + inThinkTag, _ := ctx[inThinkTagKey].(bool) + reasoningBuffer, _ := ctx[reasoningBufferKey].(string) + + for i, choice := range chunk.Choices { + content := choice.Delta.Content + if content == "" { + continue + } + + // Check for tag start + if strings.Contains(content, "") { + inThinkTag = true + ctx[inThinkTagKey] = true + + // Emit reasoning start event + if !reasoningStarted { + reasoningStarted = true + ctx[reasoningStartedKey] = true + if !yield(fantasy.StreamPart{ + Type: fantasy.StreamPartTypeReasoningStart, + ID: fmt.Sprintf("%d", i), + }) { + return ctx, false + } + } + + // Extract content after + parts := strings.SplitN(content, "", 2) + if len(parts) > 1 && parts[1] != "" { + reasoningBuffer += parts[1] + ctx[reasoningBufferKey] = reasoningBuffer + } + continue + } + + // Check for tag end + if strings.Contains(content, "") { + inThinkTag = false + ctx[inThinkTagKey] = false + + // Extract content before + parts := strings.SplitN(content, "", 2) + if len(parts) > 0 { + reasoningBuffer += parts[0] + } + + // Emit the accumulated reasoning + if reasoningBuffer != "" { + if !yield(fantasy.StreamPart{ + Type: fantasy.StreamPartTypeReasoningDelta, + ID: fmt.Sprintf("%d", i), + Delta: reasoningBuffer, + }) { + return ctx, false + } + ctx[reasoningBufferKey] = "" + } + + // Emit reasoning end + if !yield(fantasy.StreamPart{ + Type: fantasy.StreamPartTypeReasoningEnd, + ID: fmt.Sprintf("%d", i), + }) { + return ctx, false + } + continue + } + + // Accumulate reasoning content while in think tag + if inThinkTag { + reasoningBuffer += content + ctx[reasoningBufferKey] = reasoningBuffer + } + } + + return ctx, true +} + +// customToPromptFunc converts prompts to OpenAI format using the default conversion. +func customToPromptFunc(prompt fantasy.Prompt, systemPrompt, user string) ([]openaisdk.ChatCompletionMessageParamUnion, []fantasy.CallWarning) { + return openai.DefaultToPrompt(prompt, systemPrompt, user) +} + func createCustomProvider(ctx context.Context, config *ProviderConfig, modelName string) (*ProviderResult, error) { if config.ProviderURL == "" { return nil, fmt.Errorf("custom provider requires --provider-url") @@ -1012,16 +1141,23 @@ func createCustomProvider(ctx context.Context, config *ProviderConfig, modelName apiKey = "custom" } - var opts []openaicompat.Option - opts = append(opts, openaicompat.WithBaseURL(config.ProviderURL)) - opts = append(opts, openaicompat.WithAPIKey(apiKey)) - opts = append(opts, openaicompat.WithName("custom")) + // Use the openai provider directly with custom hooks to handle tags + // from models like Qwen and DeepSeek that wrap reasoning in XML tags. + var opts []openai.Option + opts = append(opts, openai.WithBaseURL(config.ProviderURL)) + opts = append(opts, openai.WithAPIKey(apiKey)) + opts = append(opts, openai.WithName("custom")) + opts = append(opts, openai.WithLanguageModelOptions( + openai.WithLanguageModelExtraContentFunc(customExtraContentFunc), + openai.WithLanguageModelStreamExtraFunc(customStreamExtraFunc), + openai.WithLanguageModelToPromptFunc(customToPromptFunc), + )) if config.TLSSkipVerify { - opts = append(opts, openaicompat.WithHTTPClient(createHTTPClientWithTLSConfig(true))) + opts = append(opts, openai.WithHTTPClient(createHTTPClientWithTLSConfig(true))) } - p, err := openaicompat.New(opts...) + p, err := openai.New(opts...) if err != nil { return nil, fmt.Errorf("failed to create custom provider: %w", err) } diff --git a/internal/ui/stream.go b/internal/ui/stream.go index 668e6ed9..5e2186b6 100644 --- a/internal/ui/stream.go +++ b/internal/ui/stream.go @@ -2,6 +2,7 @@ package ui import ( "fmt" + "regexp" "strings" "time" @@ -11,6 +12,17 @@ import ( "github.com/mark3labs/kit/internal/app" ) +// thinkTagRegex matches ... tags that some models (Qwen, DeepSeek) wrap +// reasoning content in. Used to strip these tags from streaming text content. +// The (?s) flag makes . match newlines. +var thinkTagRegex = regexp.MustCompile(`(?s)` + `` + `think` + `` + `(.*?)` + `` + `/think` + ``) + +// thinkTagOpen and thinkTagClose are the opening and closing think tag strings. +const ( + thinkTagOpen = "" + thinkTagClose = "" +) + // knightRiderFrames generates a KITT-style scanning animation where a bright // light bounces back and forth across a row of dots with a trailing glow. // Colors are derived from the active theme. Used by StreamComponent (TUI @@ -202,6 +214,10 @@ type StreamComponent struct { // reasoningDuration holds the total reasoning time, frozen when streaming text begins. reasoningDuration time.Duration + // inThinkTag tracks whether we're currently inside a section + // from models that wrap reasoning in XML-like tags (Qwen, DeepSeek). + inThinkTag bool + // renderer renders streaming assistant text in either compact or standard mode. renderer Renderer @@ -314,7 +330,9 @@ func (s *StreamComponent) GetRenderedContent() string { // Called before reading content for scrollback output or on flush tick. func (s *StreamComponent) commitPending() { if s.pendingStream.Len() > 0 { - s.streamContent.WriteString(s.pendingStream.String()) + // Strip ... tags that some models wrap reasoning in + cleanedText := thinkTagRegex.ReplaceAllString(s.pendingStream.String(), "") + s.streamContent.WriteString(cleanedText) s.pendingStream.Reset() s.renderDirty = true } @@ -408,8 +426,46 @@ func (s *StreamComponent) Update(msg tea.Msg) (tea.Model, tea.Cmd) { if s.reasoningDuration == 0 && !s.reasoningStartTime.IsZero() { s.reasoningDuration = time.Since(s.reasoningStartTime) } - s.pendingStream.WriteString(msg.Content) - if !s.flushPending { + + // Handle models that wrap reasoning in tags (Qwen, DeepSeek) + // Filter out all content between and tags + content := msg.Content + + // Check for opening tag + if strings.Contains(content, thinkTagOpen) { + parts := strings.SplitN(content, thinkTagOpen, 2) + // Content before the tag can be written + if !s.inThinkTag && parts[0] != "" { + s.pendingStream.WriteString(parts[0]) + } + s.inThinkTag = true + // Content after the opening tag is reasoning - don't write it + if len(parts) > 1 && parts[1] != "" { + // Check if the same chunk contains the closing tag + if strings.Contains(parts[1], thinkTagClose) { + innerParts := strings.SplitN(parts[1], thinkTagClose, 2) + s.inThinkTag = false + // Content after closing tag can be written + if len(innerParts) > 1 && innerParts[1] != "" { + s.pendingStream.WriteString(innerParts[1]) + } + } + } + } else if strings.Contains(content, thinkTagClose) { + // Closing tag found + parts := strings.SplitN(content, thinkTagClose, 2) + s.inThinkTag = false + // Content after closing tag can be written + if len(parts) > 1 && parts[1] != "" { + s.pendingStream.WriteString(parts[1]) + } + } else if !s.inThinkTag { + // Normal content, not inside think tags + s.pendingStream.WriteString(content) + } + // else: inside think tag, don't write this content + + if !s.flushPending && s.pendingStream.Len() > 0 { s.flushPending = true return s, streamFlushTickCmd(s.flushGeneration) }