feat: add thinking model support with configurable reasoning levels

Add extended thinking/reasoning support for Anthropic and OpenAI models: - ThinkingLevel type (off/minimal/low/medium/high) with token budgets - Stream reasoning deltas via OnReasoningDelta through SDK→TUI event pipeline - Render thinking blocks in StreamComponent (muted italic, collapsible) - ctrl+t toggles thinking visibility, shift+tab cycles thinking level - /thinking slash command with tab-completion for level names - --thinking-level CLI flag and config file support - Map ThinkingLevel to OpenAI ReasoningEffort for Responses API - Auto-bump Anthropic max_tokens when thinking budget exceeds it - Fix ResponseCompleteEvent prematurely resetting stream in streaming mode - Status bar displays current thinking level
2026-06-14 03:30:26 +00:00 · 2026-03-07 21:27:46 +03:00
parent 24ea2c94e3
commit f3ea18ae3a
14 changed files with 460 additions and 25 deletions
@@ -52,6 +52,7 @@ var (
 	topP          float32
 	topK          int32
 	stopSequences []string
+	thinkingLevel string

 	// Ollama-specific parameters
 	numGPU  int32
@@ -247,6 +248,7 @@ func init() {
 	flags.Float32Var(&topP, "top-p", 0.95, "controls diversity via nucleus sampling (0.0-1.0)")
 	flags.Int32Var(&topK, "top-k", 40, "controls diversity by limiting top K tokens to sample from")
 	flags.StringSliceVar(&stopSequences, "stop-sequences", nil, "custom stop sequences (comma-separated)")
+	flags.StringVar(&thinkingLevel, "thinking-level", "off", "extended thinking level: off, minimal, low, medium, high")

 	// Ollama-specific parameters
 	flags.Int32Var(&numGPU, "num-gpu-layers", -1, "number of model layers to offload to GPU for Ollama models (-1 for auto-detect)")
@@ -269,6 +271,7 @@ func init() {
 	_ = viper.BindPFlag("top-p", rootCmd.PersistentFlags().Lookup("top-p"))
 	_ = viper.BindPFlag("top-k", rootCmd.PersistentFlags().Lookup("top-k"))
 	_ = viper.BindPFlag("stop-sequences", rootCmd.PersistentFlags().Lookup("stop-sequences"))
+	_ = viper.BindPFlag("thinking-level", rootCmd.PersistentFlags().Lookup("thinking-level"))
 	_ = viper.BindPFlag("num-gpu-layers", rootCmd.PersistentFlags().Lookup("num-gpu-layers"))
 	_ = viper.BindPFlag("main-gpu", rootCmd.PersistentFlags().Lookup("main-gpu"))
 	_ = viper.BindPFlag("tls-skip-verify", rootCmd.PersistentFlags().Lookup("tls-skip-verify"))
@@ -980,9 +983,14 @@ func runNormalMode(ctx context.Context) error {
 		kitInstance.EmitModelChange(newModel, previousModel, source)
 	}

+	// Build thinking level callback.
+	setThinkingLevelForUI := func(level string) error {
+		return kitInstance.SetThinkingLevel(context.Background(), level)
+	}
+
 	// Check if running in non-interactive mode
 	if positionalPrompt != "" {
-		return runNonInteractiveModeApp(ctx, appInstance, cli, positionalPrompt, quietFlag, jsonFlag, noExitFlag, modelName, parsedProvider, kitInstance.GetLoadingMessage(), serverNames, toolNames, mcpToolCount, extensionToolCount, usageTracker, extCommands, contextPaths, skillItems, getWidgets, getHeader, getFooter, getToolRenderer, getEditorInterceptor, getUIVisibility, getStatusBarEntries, emitBeforeFork, emitBeforeSessionSwitch, getGlobalShortcuts, getExtensionCommands, setModelForUI, emitModelChangeForUI)
+		return runNonInteractiveModeApp(ctx, appInstance, cli, positionalPrompt, quietFlag, jsonFlag, noExitFlag, modelName, parsedProvider, kitInstance.GetLoadingMessage(), serverNames, toolNames, mcpToolCount, extensionToolCount, usageTracker, extCommands, contextPaths, skillItems, getWidgets, getHeader, getFooter, getToolRenderer, getEditorInterceptor, getUIVisibility, getStatusBarEntries, emitBeforeFork, emitBeforeSessionSwitch, getGlobalShortcuts, getExtensionCommands, setModelForUI, emitModelChangeForUI, kitInstance.IsReasoningModel(), kitInstance.GetThinkingLevel(), setThinkingLevelForUI)
 	}

 	// Quiet mode is not allowed in interactive mode
@@ -990,7 +998,7 @@ func runNormalMode(ctx context.Context) error {
 		return fmt.Errorf("--quiet requires a prompt")
 	}

-	return runInteractiveModeBubbleTea(ctx, appInstance, modelName, parsedProvider, kitInstance.GetLoadingMessage(), serverNames, toolNames, mcpToolCount, extensionToolCount, usageTracker, extCommands, contextPaths, skillItems, getWidgets, getHeader, getFooter, getToolRenderer, getEditorInterceptor, getUIVisibility, getStatusBarEntries, emitBeforeFork, emitBeforeSessionSwitch, getGlobalShortcuts, getExtensionCommands, setModelForUI, emitModelChangeForUI)
+	return runInteractiveModeBubbleTea(ctx, appInstance, modelName, parsedProvider, kitInstance.GetLoadingMessage(), serverNames, toolNames, mcpToolCount, extensionToolCount, usageTracker, extCommands, contextPaths, skillItems, getWidgets, getHeader, getFooter, getToolRenderer, getEditorInterceptor, getUIVisibility, getStatusBarEntries, emitBeforeFork, emitBeforeSessionSwitch, getGlobalShortcuts, getExtensionCommands, setModelForUI, emitModelChangeForUI, kitInstance.IsReasoningModel(), kitInstance.GetThinkingLevel(), setThinkingLevelForUI)
 }

 // runNonInteractiveModeApp executes a single prompt via the app layer and exits,
@@ -1003,7 +1011,7 @@ func runNormalMode(ctx context.Context) error {
 //
 // When --no-exit is set, after the prompt completes the interactive BubbleTea
 // TUI is started so the user can continue the conversation.
-func runNonInteractiveModeApp(ctx context.Context, appInstance *app.App, cli *ui.CLI, prompt string, quiet, jsonOutput, noExit bool, modelName, providerName, loadingMessage string, serverNames, toolNames []string, mcpToolCount, extensionToolCount int, usageTracker *ui.UsageTracker, extCommands []ui.ExtensionCommand, contextPaths []string, skillItems []ui.SkillItem, getWidgets func(string) []ui.WidgetData, getHeader, getFooter func() *ui.WidgetData, getToolRenderer func(string) *ui.ToolRendererData, getEditorInterceptor func() *ui.EditorInterceptor, getUIVisibility func() *ui.UIVisibility, getStatusBarEntries func() []ui.StatusBarEntryData, emitBeforeFork func(string, bool, string) (bool, string), emitBeforeSessionSwitch func(string) (bool, string), getGlobalShortcuts func() map[string]func(), getExtensionCommands func() []ui.ExtensionCommand, setModel func(string) error, emitModelChange func(string, string, string)) error {
+func runNonInteractiveModeApp(ctx context.Context, appInstance *app.App, cli *ui.CLI, prompt string, quiet, jsonOutput, noExit bool, modelName, providerName, loadingMessage string, serverNames, toolNames []string, mcpToolCount, extensionToolCount int, usageTracker *ui.UsageTracker, extCommands []ui.ExtensionCommand, contextPaths []string, skillItems []ui.SkillItem, getWidgets func(string) []ui.WidgetData, getHeader, getFooter func() *ui.WidgetData, getToolRenderer func(string) *ui.ToolRendererData, getEditorInterceptor func() *ui.EditorInterceptor, getUIVisibility func() *ui.UIVisibility, getStatusBarEntries func() []ui.StatusBarEntryData, emitBeforeFork func(string, bool, string) (bool, string), emitBeforeSessionSwitch func(string) (bool, string), getGlobalShortcuts func() map[string]func(), getExtensionCommands func() []ui.ExtensionCommand, setModel func(string) error, emitModelChange func(string, string, string), isReasoningModel bool, thinkingLevel string, setThinkingLevel func(string) error) error {
 	// Expand @file references in the prompt before sending to the agent.
 	if cwd, err := os.Getwd(); err == nil {
 		prompt = ui.ProcessFileAttachments(prompt, cwd)
@@ -1046,7 +1054,7 @@ func runNonInteractiveModeApp(ctx context.Context, appInstance *app.App, cli *ui

 	// If --no-exit was requested, hand off to the interactive TUI.
 	if noExit {
-		return runInteractiveModeBubbleTea(ctx, appInstance, modelName, providerName, loadingMessage, serverNames, toolNames, mcpToolCount, extensionToolCount, usageTracker, extCommands, contextPaths, skillItems, getWidgets, getHeader, getFooter, getToolRenderer, getEditorInterceptor, getUIVisibility, getStatusBarEntries, emitBeforeFork, emitBeforeSessionSwitch, getGlobalShortcuts, getExtensionCommands, setModel, emitModelChange)
+		return runInteractiveModeBubbleTea(ctx, appInstance, modelName, providerName, loadingMessage, serverNames, toolNames, mcpToolCount, extensionToolCount, usageTracker, extCommands, contextPaths, skillItems, getWidgets, getHeader, getFooter, getToolRenderer, getEditorInterceptor, getUIVisibility, getStatusBarEntries, emitBeforeFork, emitBeforeSessionSwitch, getGlobalShortcuts, getExtensionCommands, setModel, emitModelChange, isReasoningModel, thinkingLevel, setThinkingLevel)
 	}

 	return nil
@@ -1140,7 +1148,7 @@ func writeJSONError(err error) {
 //  4. Calls program.Run() which blocks until the user quits (Ctrl+C or /quit).
 //
 // SetupCLI is not used for interactive mode; the TUI (AppModel) handles its own rendering.
-func runInteractiveModeBubbleTea(_ context.Context, appInstance *app.App, modelName, providerName, loadingMessage string, serverNames, toolNames []string, mcpToolCount, extensionToolCount int, usageTracker *ui.UsageTracker, extCommands []ui.ExtensionCommand, contextPaths []string, skillItems []ui.SkillItem, getWidgets func(string) []ui.WidgetData, getHeader, getFooter func() *ui.WidgetData, getToolRenderer func(string) *ui.ToolRendererData, getEditorInterceptor func() *ui.EditorInterceptor, getUIVisibility func() *ui.UIVisibility, getStatusBarEntries func() []ui.StatusBarEntryData, emitBeforeFork func(string, bool, string) (bool, string), emitBeforeSessionSwitch func(string) (bool, string), getGlobalShortcuts func() map[string]func(), getExtensionCommands func() []ui.ExtensionCommand, setModel func(string) error, emitModelChange func(string, string, string)) error {
+func runInteractiveModeBubbleTea(_ context.Context, appInstance *app.App, modelName, providerName, loadingMessage string, serverNames, toolNames []string, mcpToolCount, extensionToolCount int, usageTracker *ui.UsageTracker, extCommands []ui.ExtensionCommand, contextPaths []string, skillItems []ui.SkillItem, getWidgets func(string) []ui.WidgetData, getHeader, getFooter func() *ui.WidgetData, getToolRenderer func(string) *ui.ToolRendererData, getEditorInterceptor func() *ui.EditorInterceptor, getUIVisibility func() *ui.UIVisibility, getStatusBarEntries func() []ui.StatusBarEntryData, emitBeforeFork func(string, bool, string) (bool, string), emitBeforeSessionSwitch func(string) (bool, string), getGlobalShortcuts func() map[string]func(), getExtensionCommands func() []ui.ExtensionCommand, setModel func(string) error, emitModelChange func(string, string, string), isReasoningModel bool, thinkingLevel string, setThinkingLevel func(string) error) error {
 	// Determine terminal size; fall back gracefully.
 	termWidth, termHeight, err := term.GetSize(int(os.Stdout.Fd()))
 	if err != nil || termWidth == 0 {
@@ -1178,6 +1186,9 @@ func runInteractiveModeBubbleTea(_ context.Context, appInstance *app.App, modelN
 		GetExtensionCommands:    getExtensionCommands,
 		SetModel:                setModel,
 		EmitModelChange:         emitModelChange,
+		ThinkingLevel:           thinkingLevel,
+		IsReasoningModel:        isReasoningModel,
+		SetThinkingLevel:        setThinkingLevel,
 	})

 	// Print startup info to stdout before Bubble Tea takes over the screen.
@@ -58,6 +58,9 @@ type StreamingResponseHandler func(content string)
 // ToolCallContentHandler is a function type for handling content that accompanies tool calls.
 type ToolCallContentHandler func(content string)

+// ReasoningDeltaHandler is a function type for handling streaming reasoning/thinking deltas.
+type ReasoningDeltaHandler func(delta string)
+
 // Agent represents an AI agent with core tool integration using the fantasy library.
 // Core tools (bash, read, write, edit, grep, find, ls) are registered as direct
 // fantasy.AgentTool implementations — no MCP layer, no serialization overhead.
@@ -211,7 +214,7 @@ func (a *Agent) GenerateWithLoop(ctx context.Context, messages []fantasy.Message
 	onResponse ResponseHandler, onToolCallContent ToolCallContentHandler,
 ) (*GenerateWithLoopResult, error) {
 	return a.GenerateWithLoopAndStreaming(ctx, messages, onToolCall, onToolExecution, onToolResult,
-		onResponse, onToolCallContent, nil)
+		onResponse, onToolCallContent, nil, nil)
 }

 // GenerateWithLoopAndStreaming processes messages using the fantasy agent with streaming and callbacks.
@@ -221,6 +224,7 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 	onToolCall ToolCallHandler, onToolExecution ToolExecutionHandler, onToolResult ToolResultHandler,
 	onResponse ResponseHandler, onToolCallContent ToolCallContentHandler,
 	onStreamingResponse StreamingResponseHandler,
+	onReasoningDelta ReasoningDeltaHandler,
 ) (*GenerateWithLoopResult, error) {

 	// Fantasy requires the current user input as Prompt, with prior messages as history.
@@ -236,7 +240,7 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 	// Stream is required to observe tool execution in real time. The non-streaming
 	// Generate path is reserved for the simple case with no callbacks at all.
 	hasCallbacks := onToolCall != nil || onToolExecution != nil || onToolResult != nil ||
-		onToolCallContent != nil || onStreamingResponse != nil
+		onToolCallContent != nil || onStreamingResponse != nil || onReasoningDelta != nil

 	if a.streamingEnabled || hasCallbacks {
 		// Use fantasy's streaming agent
@@ -244,6 +248,17 @@ func (a *Agent) GenerateWithLoopAndStreaming(ctx context.Context, messages []fan
 			Prompt:   prompt,
 			Messages: history,

+			// Reasoning/thinking streaming callback
+			OnReasoningDelta: func(id, delta string) error {
+				if ctx.Err() != nil {
+					return ctx.Err()
+				}
+				if onReasoningDelta != nil {
+					onReasoningDelta(delta)
+				}
+				return nil
+			},
+
 			// Text streaming callback
 			OnTextDelta: func(id, text string) error {
 				if ctx.Err() != nil {
@@ -522,6 +522,8 @@ func (a *App) subscribeSDKEvents(sendFn func(tea.Msg)) func() {
 			sendFn(ResponseCompleteEvent{Content: ev.Content})
 		case kit.MessageUpdateEvent:
 			sendFn(StreamChunkEvent{Content: ev.Chunk})
+		case kit.ReasoningDeltaEvent:
+			sendFn(ReasoningChunkEvent{Delta: ev.Delta})
 		}
 	}))

@@ -9,6 +9,13 @@ type StreamChunkEvent struct {
 	Content string
 }

+// ReasoningChunkEvent is sent when a streaming reasoning/thinking delta arrives
+// from the LLM. Thinking content is rendered separately from regular text.
+type ReasoningChunkEvent struct {
+	// Delta is the incremental reasoning text from the streaming response.
+	Delta string
+}
+
 // ToolCallStartedEvent is sent when a tool call has been parsed and is about to execute.
 // It carries the tool name and its arguments for display purposes.
 type ToolCallStartedEvent struct {
@@ -165,6 +165,9 @@ type Config struct {
 	TopK          *int32   `json:"top-k,omitempty" yaml:"top-k,omitempty"`
 	StopSequences []string `json:"stop-sequences,omitempty" yaml:"stop-sequences,omitempty"`

+	// Thinking / extended reasoning
+	ThinkingLevel string `json:"thinking-level,omitempty" yaml:"thinking-level,omitempty"`
+
 	// TLS configuration
 	TLSSkipVerify bool `json:"tls-skip-verify,omitempty" yaml:"tls-skip-verify,omitempty"`
 }
@@ -79,6 +79,7 @@ func BuildProviderConfig() (*models.ProviderConfig, string, error) {
 		NumGPU:         &numGPU,
 		MainGPU:        &mainGPU,
 		TLSSkipVerify:  viper.GetBool("tls-skip-verify"),
+		ThinkingLevel:  models.ParseThinkingLevel(viper.GetString("thinking-level")),
 	}

 	return cfg, systemPrompt, nil
@@ -57,6 +57,66 @@ func resolveModelAlias(provider, modelName string) string {
 	return modelName
 }

+// ThinkingLevel controls extended thinking / reasoning budget for supported models.
+type ThinkingLevel string
+
+const (
+	ThinkingOff     ThinkingLevel = "off"
+	ThinkingMinimal ThinkingLevel = "minimal"
+	ThinkingLow     ThinkingLevel = "low"
+	ThinkingMedium  ThinkingLevel = "medium"
+	ThinkingHigh    ThinkingLevel = "high"
+)
+
+// ThinkingLevels returns the ordered list of available thinking levels for cycling.
+func ThinkingLevels() []ThinkingLevel {
+	return []ThinkingLevel{ThinkingOff, ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh}
+}
+
+// ThinkingBudgetTokens returns the token budget for a thinking level, or 0 for "off".
+func ThinkingBudgetTokens(level ThinkingLevel) int64 {
+	switch level {
+	case ThinkingMinimal:
+		return 1024
+	case ThinkingLow:
+		return 4096
+	case ThinkingMedium:
+		return 10240
+	case ThinkingHigh:
+		return 20480
+	default:
+		return 0
+	}
+}
+
+// ThinkingLevelDescription returns a human-readable description of a thinking level.
+func ThinkingLevelDescription(level ThinkingLevel) string {
+	switch level {
+	case ThinkingOff:
+		return "No reasoning"
+	case ThinkingMinimal:
+		return "Very brief reasoning (~1k tokens)"
+	case ThinkingLow:
+		return "Light reasoning (~4k tokens)"
+	case ThinkingMedium:
+		return "Moderate reasoning (~10k tokens)"
+	case ThinkingHigh:
+		return "Deep reasoning (~20k tokens)"
+	default:
+		return "No reasoning"
+	}
+}
+
+// ParseThinkingLevel converts a string to a ThinkingLevel, defaulting to ThinkingOff.
+func ParseThinkingLevel(s string) ThinkingLevel {
+	switch ThinkingLevel(s) {
+	case ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh:
+		return ThinkingLevel(s)
+	default:
+		return ThinkingOff
+	}
+}
+
 // ProviderConfig holds configuration for creating LLM providers.
 type ProviderConfig struct {
 	ModelString    string
@@ -71,6 +131,7 @@ type ProviderConfig struct {
 	NumGPU         *int32
 	MainGPU        *int32
 	TLSSkipVerify  bool
+	ThinkingLevel  ThinkingLevel
 }

 // ProviderResult contains the result of provider creation.
@@ -320,7 +381,7 @@ func createAutoRoutedOpenAIProvider(ctx context.Context, config *ProviderConfig,
 		return nil, fmt.Errorf("failed to create %s model: %w", info.Name, err)
 	}

-	providerOpts := buildOpenAIProviderOptions(modelName)
+	providerOpts := buildOpenAIProviderOptions(config, modelName)

 	return &ProviderResult{Model: model, ProviderOptions: providerOpts}, nil
 }
@@ -355,10 +416,10 @@ func validateModelConfig(config *ProviderConfig, modelInfo *ModelInfo) {

 // buildOpenAIProviderOptions returns fantasy.ProviderOptions configured for
 // OpenAI Responses API models. For reasoning models it sets reasoning_summary
-// to "auto" and includes encrypted reasoning content — matching the behaviour
-// of crush's coordinator. For non-responses or non-reasoning models the
+// to "auto", includes encrypted reasoning content, and maps the ThinkingLevel
+// to an OpenAI ReasoningEffort. For non-responses or non-reasoning models the
 // returned map is nil (no extra options needed).
-func buildOpenAIProviderOptions(modelName string) fantasy.ProviderOptions {
+func buildOpenAIProviderOptions(config *ProviderConfig, modelName string) fantasy.ProviderOptions {
 	if !openai.IsResponsesModel(modelName) {
 		return nil
 	}
@@ -371,6 +432,12 @@ func buildOpenAIProviderOptions(modelName string) fantasy.ProviderOptions {
 				openai.IncludeReasoningEncryptedContent,
 			},
 		}
+
+		// Map ThinkingLevel to OpenAI ReasoningEffort.
+		if effort := thinkingLevelToReasoningEffort(config.ThinkingLevel); effort != nil {
+			opts.ReasoningEffort = effort
+		}
+
 		return fantasy.ProviderOptions{
 			openai.Name: opts,
 		}
@@ -379,6 +446,57 @@ func buildOpenAIProviderOptions(modelName string) fantasy.ProviderOptions {
 	return nil
 }

+// thinkingLevelToReasoningEffort maps a ThinkingLevel to an OpenAI ReasoningEffort.
+// Returns nil for ThinkingOff (use the model's default).
+func thinkingLevelToReasoningEffort(level ThinkingLevel) *openai.ReasoningEffort {
+	switch level {
+	case ThinkingMinimal:
+		return openai.ReasoningEffortOption(openai.ReasoningEffortMinimal)
+	case ThinkingLow:
+		return openai.ReasoningEffortOption(openai.ReasoningEffortLow)
+	case ThinkingMedium:
+		return openai.ReasoningEffortOption(openai.ReasoningEffortMedium)
+	case ThinkingHigh:
+		return openai.ReasoningEffortOption(openai.ReasoningEffortHigh)
+	default:
+		return nil
+	}
+}
+
+// buildAnthropicProviderOptions returns fantasy.ProviderOptions configured for
+// Anthropic models with extended thinking. When thinking is enabled, it sets
+// SendReasoning to true and configures the thinking budget. For thinking-off
+// or non-reasoning models the returned map is nil.
+//
+// Anthropic requires max_tokens > thinking.budget_tokens. If the configured
+// MaxTokens is too low, it is bumped to budget + 4096 to leave room for the
+// actual response.
+func buildAnthropicProviderOptions(config *ProviderConfig, modelName string) fantasy.ProviderOptions {
+	if config.ThinkingLevel == "" || config.ThinkingLevel == ThinkingOff {
+		return nil
+	}
+
+	budget := ThinkingBudgetTokens(config.ThinkingLevel)
+	if budget == 0 {
+		return nil
+	}
+
+	// Ensure MaxTokens exceeds the thinking budget (Anthropic requirement).
+	minRequired := int(budget) + 4096
+	if config.MaxTokens < minRequired {
+		config.MaxTokens = minRequired
+	}
+
+	sendReasoning := true
+	opts := &anthropic.ProviderOptions{
+		SendReasoning: &sendReasoning,
+		Thinking: &anthropic.ThinkingProviderOption{
+			BudgetTokens: budget,
+		},
+	}
+	return anthropic.NewProviderOptions(opts)
+}
+
 func createAnthropicProvider(ctx context.Context, config *ProviderConfig, modelName string) (*ProviderResult, error) {
 	apiKey, source, err := auth.GetAnthropicAPIKey(config.ProviderAPIKey)
 	if err != nil {
@@ -415,7 +533,10 @@ func createAnthropicProvider(ctx context.Context, config *ProviderConfig, modelN
 		return nil, fmt.Errorf("failed to create Anthropic model: %w", err)
 	}

-	return &ProviderResult{Model: model}, nil
+	// Build provider options for extended thinking (reasoning budget).
+	providerOpts := buildAnthropicProviderOptions(config, modelName)
+
+	return &ProviderResult{Model: model, ProviderOptions: providerOpts}, nil
 }

 func createVertexAnthropicProvider(ctx context.Context, config *ProviderConfig, modelName string) (*ProviderResult, error) {
@@ -487,7 +608,7 @@ func createOpenAIProvider(ctx context.Context, config *ProviderConfig, modelName
 	}

 	// Build provider options for OpenAI Responses API reasoning models.
-	providerOpts := buildOpenAIProviderOptions(modelName)
+	providerOpts := buildOpenAIProviderOptions(config, modelName)

 	return &ProviderResult{Model: model, ProviderOptions: providerOpts}, nil
 }
@@ -1,6 +1,11 @@
 package ui

-import "slices"
+import (
+	"slices"
+	"strings"
+
+	"github.com/mark3labs/kit/internal/models"
+)

 // SlashCommand represents a user-invokable slash command with its metadata.
 // Commands can have multiple aliases and are organized by category for better
@@ -72,6 +77,23 @@ var SlashCommands = []SlashCommand{
 		Category:    "System",
 		Aliases:     []string{"/m"},
 	},
+	{
+		Name:        "/thinking",
+		Description: "Set thinking/reasoning level (off, minimal, low, medium, high)",
+		Category:    "System",
+		Aliases:     []string{"/think"},
+		Complete: func(prefix string) []string {
+			levels := models.ThinkingLevels()
+			var matches []string
+			for _, l := range levels {
+				s := string(l)
+				if prefix == "" || strings.HasPrefix(s, strings.ToLower(prefix)) {
+					matches = append(matches, s)
+				}
+			}
+			return matches
+		},
+	},
 	{
 		Name:        "/quit",
 		Description: "Exit the application",
@@ -13,6 +13,7 @@ import (
 	"charm.land/lipgloss/v2"
 	"github.com/mark3labs/kit/internal/app"
 	"github.com/mark3labs/kit/internal/core"
+	"github.com/mark3labs/kit/internal/models"
 	"github.com/mark3labs/kit/internal/session"
 )

@@ -321,6 +322,13 @@ type AppModelOptions struct {
 	// successful model switch. Parameters are (newModel, previousModel, source).
 	// May be nil if extensions are not loaded.
 	EmitModelChange func(newModel, previousModel, source string)
+
+	// ThinkingLevel is the initial thinking level (e.g. "off", "medium").
+	ThinkingLevel string
+	// IsReasoningModel is true when the current model supports reasoning.
+	IsReasoningModel bool
+	// SetThinkingLevel changes the thinking level on the agent/provider.
+	SetThinkingLevel func(level string) error
 }

 // AppModel is the root Bubble Tea model for the interactive TUI. It owns the
@@ -442,6 +450,16 @@ type AppModel struct {
 	// Returns (cancelled, reason). May be nil if no extensions are loaded.
 	emitBeforeSessionSwitch func(reason string) (bool, string)

+	// thinkingLevel is the current extended thinking level.
+	thinkingLevel string
+	// thinkingVisible controls whether reasoning blocks are shown or collapsed.
+	thinkingVisible bool
+	// isReasoningModel is true when the current model supports reasoning.
+	isReasoningModel bool
+	// setThinkingLevel is a callback to change the thinking level on the agent.
+	// It takes the new level string and returns an error if the change fails.
+	setThinkingLevel func(level string) error
+
 	// getGlobalShortcuts returns extension-registered keyboard shortcuts.
 	// May be nil if no extensions are loaded.
 	getGlobalShortcuts func() map[string]func()
@@ -519,6 +537,10 @@ type streamComponentIface interface {
 	// Returns "" when the spinner is not active. The parent renders this in the
 	// status bar so the spinner never changes the view height.
 	SpinnerView() string
+	// SetThinkingVisible sets whether reasoning blocks are shown or collapsed.
+	SetThinkingVisible(visible bool)
+	// HasReasoning returns true if any reasoning content has been accumulated.
+	HasReasoning() bool
 }

 // --------------------------------------------------------------------------
@@ -585,6 +607,10 @@ func NewAppModel(appCtrl AppController, opts AppModelOptions) *AppModel {
 	m.getExtensionCommands = opts.GetExtensionCommands
 	m.setModel = opts.SetModel
 	m.emitModelChange = opts.EmitModelChange
+	m.thinkingLevel = opts.ThinkingLevel
+	m.thinkingVisible = true // default to showing thinking blocks
+	m.isReasoningModel = opts.IsReasoningModel
+	m.setThinkingLevel = opts.SetThinkingLevel

 	// Store context/skills metadata and tool counts for startup display.
 	m.contextPaths = opts.ContextPaths
@@ -613,6 +639,7 @@ func NewAppModel(appCtrl AppController, opts AppModelOptions) *AppModel {
 	}

 	m.stream = NewStreamComponent(opts.CompactMode, width, opts.ModelName)
+	m.stream.SetThinkingVisible(m.thinkingVisible)

 	// Propagate initial height distribution to children.
 	m.distributeHeight()
@@ -871,6 +898,23 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 			}
 		}

+		// Thinking keybindings — only when the model supports reasoning.
+		if m.isReasoningModel {
+			switch msg.String() {
+			case "ctrl+t":
+				// Toggle thinking block visibility.
+				m.thinkingVisible = !m.thinkingVisible
+				if m.stream != nil {
+					m.stream.SetThinkingVisible(m.thinkingVisible)
+				}
+				return m, tea.Batch(cmds...)
+			case "shift+tab":
+				// Cycle thinking level.
+				m.cycleThinkingLevel()
+				return m, tea.Batch(cmds...)
+			}
+		}
+
 		// Route to tree selector when active.
 		if m.state == stateTreeSelector && m.treeSelector != nil {
 			updated, cmd := m.treeSelector.Update(msg)
@@ -984,6 +1028,11 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 						cmds = append(cmds, cmd)
 					}
 					return m, tea.Batch(cmds...)
+				case "/thinking":
+					if cmd := m.handleThinkingCommand(strings.TrimSpace(args)); cmd != nil {
+						cmds = append(cmds, cmd)
+					}
+					return m, tea.Batch(cmds...)
 				}
 			}
 		}
@@ -1052,6 +1101,12 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 			cmds = append(cmds, cmd)
 		}

+	case app.ReasoningChunkEvent:
+		if m.stream != nil {
+			_, cmd := m.stream.Update(msg)
+			cmds = append(cmds, cmd)
+		}
+
 	case app.StreamChunkEvent:
 		if m.stream != nil {
 			_, cmd := m.stream.Update(msg)
@@ -1087,13 +1142,17 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 		// double-printing.

 	case app.ResponseCompleteEvent:
-		// Non-streaming mode: this carries the full response text (StreamChunkEvents
-		// never fire). Print it immediately.
-		if msg.Content != "" {
+		// This event fires for both streaming and non-streaming paths.
+		// In streaming mode, the content was already delivered via StreamChunkEvents
+		// and is sitting in the stream component (possibly with reasoning). Don't
+		// print or reset — flushStreamContent() handles it on the next step.
+		// In non-streaming mode (no stream content accumulated), print the text.
+		hasStreamContent := m.stream != nil && m.stream.GetRenderedContent() != ""
+		if !hasStreamContent && msg.Content != "" {
 			cmds = append(cmds, m.printAssistantMessage(msg.Content))
-		}
-		if m.stream != nil {
-			m.stream.Reset() // stop spinner
+			if m.stream != nil {
+				m.stream.Reset()
+			}
 		}

 	case app.MessageCreatedEvent:
@@ -1448,8 +1507,14 @@ func (m *AppModel) renderStatusBar() string {
 		leftSide = m.stream.SpinnerView()
 	}

-	// Middle: extension status bar entries (sorted by priority).
+	// Middle: thinking level (when reasoning model) + extension status bar entries.
 	var middleParts []string
+	if m.isReasoningModel && m.thinkingLevel != "" && m.thinkingLevel != "off" {
+		thinkingLabel := "Thinking: " + m.thinkingLevel
+		middleParts = append(middleParts, lipgloss.NewStyle().
+			Foreground(theme.Secondary).
+			Render(thinkingLabel))
+	}
 	if m.getStatusBarEntries != nil {
 		entries := m.getStatusBarEntries()
 		for _, e := range entries {
@@ -1493,6 +1558,35 @@ func (m *AppModel) renderStatusBar() string {
 	return leftSide + middleSide + strings.Repeat(" ", gap) + rightSide
 }

+// cycleThinkingLevel advances to the next thinking level and applies it.
+func (m *AppModel) cycleThinkingLevel() {
+	levels := []string{"off", "minimal", "low", "medium", "high"}
+	current := m.thinkingLevel
+	if current == "" {
+		current = "off"
+	}
+
+	// Find current index and advance to next.
+	idx := 0
+	for i, l := range levels {
+		if l == current {
+			idx = i
+			break
+		}
+	}
+	next := levels[(idx+1)%len(levels)]
+	m.thinkingLevel = next
+
+	// Apply the change to the agent/provider.
+	if m.setThinkingLevel != nil {
+		// Run in goroutine to avoid blocking the event loop (provider
+		// recreation may take time).
+		go func() {
+			_ = m.setThinkingLevel(next)
+		}()
+	}
+}
+
 // renderSeparator renders the separator line with an optional queue count badge.
 func (m *AppModel) renderSeparator() string {
 	theme := GetTheme()
@@ -1678,6 +1772,8 @@ func (m *AppModel) handleSlashCommand(sc *SlashCommand) tea.Cmd {
 		return m.printResetUsage()
 	case "/model":
 		return m.handleModelCommand("")
+	case "/thinking":
+		return m.handleThinkingCommand("")
 	case "/compact":
 		return m.handleCompactCommand("")
 	case "/clear":
@@ -2150,6 +2246,49 @@ func (m *AppModel) handleModelCommand(args string) tea.Cmd {
 	return m.printSystemMessage(fmt.Sprintf("Switched to %s", args))
 }

+// --------------------------------------------------------------------------
+// Thinking command handler
+// --------------------------------------------------------------------------
+
+// handleThinkingCommand changes or displays the current thinking/reasoning level.
+// With no arguments, it shows the current level. With a level argument (off,
+// minimal, low, medium, high) it switches to that level.
+func (m *AppModel) handleThinkingCommand(args string) tea.Cmd {
+	if !m.isReasoningModel {
+		return m.printSystemMessage("Current model does not support thinking/reasoning.")
+	}
+
+	if args == "" {
+		// Show current level with descriptions.
+		var lines []string
+		levels := models.ThinkingLevels()
+		for _, l := range levels {
+			marker := "  "
+			if string(l) == m.thinkingLevel {
+				marker = "▸ "
+			}
+			lines = append(lines, fmt.Sprintf("%s%s — %s", marker, l, models.ThinkingLevelDescription(l)))
+		}
+		header := fmt.Sprintf("Current thinking level: %s\n\nAvailable levels:", m.thinkingLevel)
+		return m.printSystemMessage(header + "\n" + strings.Join(lines, "\n"))
+	}
+
+	// Parse and validate the level.
+	level := models.ParseThinkingLevel(args)
+	if string(level) != strings.ToLower(args) {
+		return m.printSystemMessage(fmt.Sprintf("Unknown thinking level: %q. Use: off, minimal, low, medium, high", args))
+	}
+
+	// Apply the change.
+	m.thinkingLevel = string(level)
+	if m.setThinkingLevel != nil {
+		go func() {
+			_ = m.setThinkingLevel(string(level))
+		}()
+	}
+	return m.printSystemMessage(fmt.Sprintf("Thinking level set to: %s — %s", level, models.ThinkingLevelDescription(level)))
+}
+
 // --------------------------------------------------------------------------
 // Tree session command handlers
 // --------------------------------------------------------------------------
@@ -83,6 +83,8 @@ func (s *stubStreamComponent) Reset()                     { s.resetCalled++; s.r
 func (s *stubStreamComponent) SetHeight(h int)            { s.height = h }
 func (s *stubStreamComponent) GetRenderedContent() string { return s.renderedContent }
 func (s *stubStreamComponent) SpinnerView() string        { return "" }
+func (s *stubStreamComponent) SetThinkingVisible(bool)    {}
+func (s *stubStreamComponent) HasReasoning() bool         { return false }

 // stubInputComponent satisfies inputComponentIface without rendering anything.
 type stubInputComponent struct {
@@ -121,6 +121,12 @@ type StreamComponent struct {
 	// streamContent accumulates all streaming text chunks.
 	streamContent strings.Builder

+	// reasoningContent accumulates reasoning/thinking text chunks.
+	reasoningContent strings.Builder
+
+	// thinkingVisible controls whether reasoning blocks are shown or collapsed.
+	thinkingVisible bool
+
 	// messageRenderer renders assistant messages in standard mode.
 	messageRenderer *MessageRenderer

@@ -177,6 +183,7 @@ func (s *StreamComponent) Reset() {
 	s.spinnerFrame = 0
 	s.spinnerMsg = ""
 	s.streamContent.Reset()
+	s.reasoningContent.Reset()
 	s.timestamp = time.Time{}
 }

@@ -184,11 +191,22 @@ func (s *StreamComponent) Reset() {
 // streaming text. Returns empty string if no text has been accumulated. Used by
 // the parent AppModel to flush content via tea.Println() before resetting.
 func (s *StreamComponent) GetRenderedContent() string {
+	var sections []string
+
+	// Include rendered reasoning block if present.
+	if reasoning := s.reasoningContent.String(); reasoning != "" {
+		sections = append(sections, s.renderReasoningBlock(reasoning))
+	}
+
 	text := s.streamContent.String()
-	if text == "" {
+	if text != "" {
+		sections = append(sections, s.renderStreamingText(text))
+	}
+
+	if len(sections) == 0 {
 		return ""
 	}
-	return s.renderStreamingText(text)
+	return strings.Join(sections, "\n")
 }

 // --------------------------------------------------------------------------
@@ -228,8 +246,17 @@ func (s *StreamComponent) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 				s.timestamp = time.Now()
 			}
 			return s, streamSpinnerTickCmd()
+		} else if !msg.Show && s.spinning {
+			s.spinning = false
 		}

+	case app.ReasoningChunkEvent:
+		s.phase = streamPhaseActive
+		if s.timestamp.IsZero() {
+			s.timestamp = time.Now()
+		}
+		s.reasoningContent.WriteString(msg.Delta)
+
 	case app.StreamChunkEvent:
 		s.phase = streamPhaseActive
 		if s.timestamp.IsZero() {
@@ -271,14 +298,25 @@ func (s *StreamComponent) render() string {
 		return ""
 	}

+	var sections []string
+
+	// Render reasoning/thinking block above the main text if present.
+	if reasoning := s.reasoningContent.String(); reasoning != "" {
+		sections = append(sections, s.renderReasoningBlock(reasoning))
+	}
+
 	// Render streaming text only. The spinner is rendered in the status bar
 	// by the parent so it never changes the stream region height.
 	text := s.streamContent.String()
-	if text == "" {
+	if text != "" {
+		sections = append(sections, s.renderStreamingText(text))
+	}
+
+	if len(sections) == 0 {
 		return ""
 	}

-	content := s.renderStreamingText(text)
+	content := strings.Join(sections, "\n")

 	// Clamp to height if constrained: keep the last h lines so the most
 	// recent output is always visible.
@@ -293,6 +331,44 @@ func (s *StreamComponent) render() string {
 	return content
 }

+// renderReasoningBlock renders the reasoning/thinking content. When thinking
+// is visible, the full reasoning text is shown in muted italic style. When
+// collapsed, a "Thinking..." label is shown instead.
+func (s *StreamComponent) renderReasoningBlock(reasoning string) string {
+	theme := GetTheme()
+
+	if !s.thinkingVisible {
+		// Show collapsed "Thinking..." label.
+		return lipgloss.NewStyle().
+			Foreground(theme.Muted).
+			Italic(true).
+			Render("Thinking...")
+	}
+
+	// Render full reasoning text in muted italic style.
+	style := lipgloss.NewStyle().
+		Foreground(theme.Muted).
+		Italic(true)
+
+	// Wrap to terminal width.
+	maxWidth := s.width - 4 // leave some margin
+	if maxWidth < 20 {
+		maxWidth = 20
+	}
+	styled := style.Width(maxWidth).Render(reasoning)
+	return styled
+}
+
+// SetThinkingVisible sets whether reasoning blocks are shown or collapsed.
+func (s *StreamComponent) SetThinkingVisible(visible bool) {
+	s.thinkingVisible = visible
+}
+
+// HasReasoning returns true if any reasoning content has been accumulated.
+func (s *StreamComponent) HasReasoning() bool {
+	return s.reasoningContent.Len() > 0
+}
+
 // SpinnerView returns the rendered spinner line for the parent to embed in the
 // status bar. Returns "" when the spinner is not active.
 func (s *StreamComponent) SpinnerView() string {
@@ -45,6 +45,7 @@ func setSDKDefaults() {
 	viper.SetDefault("top-p", 0.95)
 	viper.SetDefault("top-k", 40)
 	viper.SetDefault("stream", true)
+	viper.SetDefault("thinking-level", "off")
 	viper.SetDefault("num-gpu-layers", -1)
 	viper.SetDefault("main-gpu", 0)
 }
@@ -34,6 +34,8 @@ const (
 	EventResponse EventType = "response"
 	// EventCompaction fires after a successful compaction.
 	EventCompaction EventType = "compaction"
+	// EventReasoningDelta fires for each streaming reasoning/thinking chunk.
+	EventReasoningDelta EventType = "reasoning_delta"
 )

 // ---------------------------------------------------------------------------
@@ -81,6 +83,14 @@ type MessageUpdateEvent struct {
 // EventType implements Event.
 func (e MessageUpdateEvent) EventType() EventType { return EventMessageUpdate }

+// ReasoningDeltaEvent fires for each streaming reasoning/thinking chunk.
+type ReasoningDeltaEvent struct {
+	Delta string
+}
+
+// EventType implements Event.
+func (e ReasoningDeltaEvent) EventType() EventType { return EventReasoningDelta }
+
 // MessageEndEvent fires when the assistant message is complete.
 type MessageEndEvent struct {
 	Content string
@@ -485,6 +485,7 @@ func (m *Kit) SetModel(ctx context.Context, modelString string) error {
 		ProviderURL:    viper.GetString("provider-url"),
 		MaxTokens:      viper.GetInt("max-tokens"),
 		TLSSkipVerify:  viper.GetBool("tls-skip-verify"),
+		ThinkingLevel:  models.ParseThinkingLevel(viper.GetString("thinking-level")),
 	}
 	temperature := float32(viper.GetFloat64("temperature"))
 	config.Temperature = &temperature
@@ -1198,6 +1199,9 @@ func (m *Kit) generate(ctx context.Context, messages []fantasy.Message) (*agent.
 		func(chunk string) {
 			m.events.emit(MessageUpdateEvent{Chunk: chunk})
 		},
+		func(delta string) {
+			m.events.emit(ReasoningDeltaEvent{Delta: delta})
+		},
 	)
 }

@@ -1488,6 +1492,27 @@ func (m *Kit) GetModelInfo() *ModelInfo {
 	return LookupModel(provider, modelID)
 }

+// IsReasoningModel returns true if the current model supports extended thinking / reasoning.
+func (m *Kit) IsReasoningModel() bool {
+	info := m.GetModelInfo()
+	return info != nil && info.Reasoning
+}
+
+// GetThinkingLevel returns the current thinking level.
+func (m *Kit) GetThinkingLevel() string {
+	return viper.GetString("thinking-level")
+}
+
+// SetThinkingLevel changes the thinking level and recreates the agent with
+// the new thinking budget. Returns an error if provider recreation fails.
+func (m *Kit) SetThinkingLevel(ctx context.Context, level string) error {
+	viper.Set("thinking-level", level)
+	// Recreate agent with new thinking config by re-running SetModel
+	// with the same model string. SetModel rebuilds the provider and
+	// passes the updated viper config (including thinking-level).
+	return m.SetModel(ctx, m.modelString)
+}
+
 // GetTools returns all tools available to the agent (core + MCP + extensions).
 func (m *Kit) GetTools() []Tool {
 	return m.agent.GetTools()