fix(models): add gpt-5.4 reasoning level support with auto-adjustment

Adds 'none' thinking level to support OpenAI gpt-5.4 models which use 'reasoning_effort: none' instead of 'minimal'. Includes validation and auto-adjustment when switching models with incompatible levels. - Add ThinkingNone constant mapping to ReasoningEffortNone - Add IsValidThinkingLevelForModel() with gpt-5.4 detection - Add SuggestThinkingLevelFallback() for level migration - Auto-adjust thinking level on model switch with user notification - Update all docs to include 'none' in valid levels Fixes #11
2026-06-13 19:20:06 +00:00 · 2026-04-21 20:19:00 +03:00
parent c1dee3ceba
commit 3ff701054a
12 changed files with 145 additions and 22 deletions
@@ -126,7 +126,7 @@ model: anthropic/claude-sonnet-latest
 max-tokens: 4096
 temperature: 0.7
 stream: true
-thinking-level: off       # off, minimal, low, medium, high
+thinking-level: off       # off, none, minimal, low, medium, high
 ```

 All of the above keys can also be set programmatically via the SDK
@@ -199,7 +199,7 @@ mcpServers:
 --stop-sequences         Custom stop sequences (comma-separated)
 --frequency-penalty      Penalize frequent tokens 0.0-2.0 (default: 0.0)
 --presence-penalty       Penalize present tokens 0.0-2.0 (default: 0.0)
--thinking-level         Extended thinking level: off, minimal, low, medium, high (default: off)
+--thinking-level         Extended thinking level: off, none, minimal, low, medium, high (default: off)

 # System
 --config                 Config file path (default: ~/.kit.yml)
@@ -548,7 +548,7 @@ host, err := kit.New(ctx, &kit.Options{

    // Generation parameters (override env/config/per-model defaults)
    MaxTokens:        16384,             // 0 = auto-resolve (env → config → per-model → 8192 floor)
-    ThinkingLevel:    "medium",          // "off", "low", "medium", "high"
+    ThinkingLevel:    "medium",          // "off", "none", "minimal", "low", "medium", "high"
    Temperature:      ptr(float32(0.2)), // pointer so 0.0 != unset; nil = provider default
    TopP:             nil,                // nil = leave provider/per-model default
    TopK:             nil,
@@ -304,7 +304,7 @@ func init() {
 	flags.Float32Var(&frequencyPenalty, "frequency-penalty", 0.0, "penalizes tokens based on frequency of appearance (0.0-2.0)")
 	flags.Float32Var(&presencePenalty, "presence-penalty", 0.0, "penalizes tokens based on whether they have appeared (0.0-2.0)")
 	flags.StringSliceVar(&stopSequences, "stop-sequences", nil, "custom stop sequences (comma-separated)")
-	flags.StringVar(&thinkingLevel, "thinking-level", "off", "extended thinking level: off, minimal, low, medium, high")
+	flags.StringVar(&thinkingLevel, "thinking-level", "off", "extended thinking level: off, none, minimal, low, medium, high")

 	// Ollama-specific parameters
 	flags.Int32Var(&numGPU, "num-gpu-layers", -1, "number of model layers to offload to GPU for Ollama models (-1 for auto-detect)")
@@ -85,6 +85,7 @@ type ThinkingLevel string

 const (
 	ThinkingOff     ThinkingLevel = "off"
+	ThinkingNone    ThinkingLevel = "none"
 	ThinkingMinimal ThinkingLevel = "minimal"
 	ThinkingLow     ThinkingLevel = "low"
 	ThinkingMedium  ThinkingLevel = "medium"
@@ -93,12 +94,14 @@ const (

 // ThinkingLevels returns the ordered list of available thinking levels for cycling.
 func ThinkingLevels() []ThinkingLevel {
-	return []ThinkingLevel{ThinkingOff, ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh}
+	return []ThinkingLevel{ThinkingOff, ThinkingNone, ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh}
 }

-// thinkingBudgetTokens returns the token budget for a thinking level, or 0 for "off".
+// thinkingBudgetTokens returns the token budget for a thinking level, or 0 for "off" or "none".
 func thinkingBudgetTokens(level ThinkingLevel) int64 {
 	switch level {
+	case ThinkingNone:
+		return 1024
 	case ThinkingMinimal:
 		return 1024
 	case ThinkingLow:
@@ -117,6 +120,8 @@ func ThinkingLevelDescription(level ThinkingLevel) string {
 	switch level {
 	case ThinkingOff:
 		return "No reasoning"
+	case ThinkingNone:
+		return "Minimal reasoning (OpenAI 'none')"
 	case ThinkingMinimal:
 		return "Very brief reasoning (~1k tokens)"
 	case ThinkingLow:
@@ -133,7 +138,7 @@ func ThinkingLevelDescription(level ThinkingLevel) string {
 // ParseThinkingLevel converts a string to a ThinkingLevel, defaulting to ThinkingOff.
 func ParseThinkingLevel(s string) ThinkingLevel {
 	switch ThinkingLevel(s) {
-	case ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh:
+	case ThinkingNone, ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh:
 		return ThinkingLevel(s)
 	default:
 		return ThinkingOff
@@ -580,6 +585,8 @@ func buildOpenAIProviderOptions(config *ProviderConfig, modelName string) fantas
 // Returns nil for ThinkingOff (use the model's default).
 func thinkingLevelToReasoningEffort(level ThinkingLevel) *openai.ReasoningEffort {
 	switch level {
+	case ThinkingNone:
+		return new(openai.ReasoningEffortNone)
 	case ThinkingMinimal:
 		return new(openai.ReasoningEffortMinimal)
 	case ThinkingLow:
@@ -593,6 +600,56 @@ func thinkingLevelToReasoningEffort(level ThinkingLevel) *openai.ReasoningEffort
 	}
 }

+// IsValidThinkingLevelForModel checks if a thinking level is valid for the given
+// model. Some OpenAI models like gpt-5.4 don't support "minimal" and require
+// "none" instead.
+func IsValidThinkingLevelForModel(level ThinkingLevel, modelName string) bool {
+	if level == ThinkingOff {
+		return true
+	}
+
+	// Check if this is an OpenAI model that doesn't support "minimal"
+	// gpt-5.4 and newer gpt-5.x models use "none" instead of "minimal"
+	if level == ThinkingMinimal {
+		if strings.Contains(modelName, "gpt-5.4") ||
+			strings.Contains(modelName, "gpt-5-pro") ||
+			strings.Contains(modelName, "gpt-5-chat") {
+			return false
+		}
+	}
+
+	// Check if this is an OpenAI model that doesn't support "none"
+	// Older gpt-5 models only support "minimal", not "none"
+	if level == ThinkingNone {
+		if strings.Contains(modelName, "gpt-5") &&
+			!strings.Contains(modelName, "gpt-5.4") &&
+			!strings.Contains(modelName, "gpt-5-pro") &&
+			!strings.Contains(modelName, "gpt-5-chat") {
+			// Older gpt-5 models might not support "none"
+			// They only added "none" support in newer versions
+			return false
+		}
+	}
+
+	// All other levels are generally valid for reasoning models
+	return true
+}
+
+// SuggestThinkingLevelFallback returns a recommended fallback level when the
+// requested level is not valid for the model. Returns ThinkingOff if no
+// suitable fallback exists.
+func SuggestThinkingLevelFallback(level ThinkingLevel, modelName string) ThinkingLevel {
+	if level == ThinkingMinimal && !IsValidThinkingLevelForModel(level, modelName) {
+		// For models that don't support "minimal", suggest "none" (~same token budget)
+		return ThinkingNone
+	}
+	if level == ThinkingNone && !IsValidThinkingLevelForModel(level, modelName) {
+		// For models that don't support "none", suggest "minimal" (~same token budget)
+		return ThinkingMinimal
+	}
+	return ThinkingOff
+}
+
 // buildAnthropicProviderOptions returns fantasy.ProviderOptions configured for
 // Anthropic models with extended thinking. When thinking is enabled, it sets
 // SendReasoning to true and configures the thinking budget. For thinking-off
@@ -84,7 +84,7 @@ var SlashCommands = []SlashCommand{
 	},
 	{
 		Name:        "/thinking",
-		Description: "Set thinking/reasoning level (off, minimal, low, medium, high)",
+		Description: "Set thinking/reasoning level (off, none, minimal, low, medium, high)",
 		Category:    "System",
 		Aliases:     []string{"/think"},
 		Complete: func(prefix string) []string {
@@ -1142,6 +1142,31 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
 		m.state = stateInput
 		if m.setModel != nil {
 			previousModel := m.providerName + "/" + m.modelName
+
+			// Check if thinking level needs adjustment for the new model.
+			// Some models (e.g., OpenAI gpt-5.4) don't support "minimal" and require "none".
+			if m.thinkingLevel != "" && m.thinkingLevel != "off" {
+				parts := strings.SplitN(msg.ModelString, "/", 2)
+				if len(parts) == 2 {
+					modelName := parts[1]
+					currentLevel := models.ParseThinkingLevel(m.thinkingLevel)
+					if !models.IsValidThinkingLevelForModel(currentLevel, modelName) {
+						fallback := models.SuggestThinkingLevelFallback(currentLevel, modelName)
+						if fallback != models.ThinkingOff {
+							m.printSystemMessage(fmt.Sprintf(
+								"Note: Model %s doesn't support '%s' thinking level. Adjusted to '%s'.",
+								modelName, currentLevel, fallback,
+							))
+							m.thinkingLevel = string(fallback)
+							if m.setThinkingLevel != nil {
+								_ = m.setThinkingLevel(string(fallback))
+							}
+							go func() { _ = prefs.SaveThinkingLevelPreference(string(fallback)) }()
+						}
+					}
+				}
+			}
+
 			if err := m.setModel(msg.ModelString); err != nil {
 				m.printSystemMessage(fmt.Sprintf("Failed to switch model: %v", err))
 			} else {
@@ -2656,7 +2681,7 @@ func (m *AppModel) renderStatusBar() string {

 // cycleThinkingLevel advances to the next thinking level and applies it.
 func (m *AppModel) cycleThinkingLevel() {
-	levels := []string{"off", "minimal", "low", "medium", "high"}
+	levels := []string{"off", "none", "minimal", "low", "medium", "high"}
 	current := m.thinkingLevel
 	if current == "" {
 		current = "off"
@@ -3841,6 +3866,30 @@ func (m *AppModel) handleModelCommand(args string) tea.Cmd {
 		return nil
 	}

+	// Check if thinking level needs adjustment for the new model.
+	// Some models (e.g., OpenAI gpt-5.4) don't support "minimal" and require "none".
+	if m.thinkingLevel != "" && m.thinkingLevel != "off" {
+		parts := strings.SplitN(args, "/", 2)
+		if len(parts) == 2 {
+			modelName := parts[1]
+			currentLevel := models.ParseThinkingLevel(m.thinkingLevel)
+			if !models.IsValidThinkingLevelForModel(currentLevel, modelName) {
+				fallback := models.SuggestThinkingLevelFallback(currentLevel, modelName)
+				if fallback != models.ThinkingOff {
+					m.printSystemMessage(fmt.Sprintf(
+						"Note: Model %s doesn't support '%s' thinking level. Adjusted to '%s'.",
+						modelName, currentLevel, fallback,
+					))
+					m.thinkingLevel = string(fallback)
+					if m.setThinkingLevel != nil {
+						_ = m.setThinkingLevel(string(fallback))
+					}
+					go func() { _ = prefs.SaveThinkingLevelPreference(string(fallback)) }()
+				}
+			}
+		}
+	}
+
 	// Direct model switch with the provided model string.
 	previousModel := m.providerName + "/" + m.modelName
 	if err := m.setModel(args); err != nil {
@@ -3945,7 +3994,7 @@ func (m *AppModel) handleThinkingCommand(args string) tea.Cmd {
 	// Parse and validate the level.
 	level := models.ParseThinkingLevel(args)
 	if string(level) != strings.ToLower(args) {
-		m.printSystemMessage(fmt.Sprintf("Unknown thinking level: %q. Use: off, minimal, low, medium, high", args))
+		m.printSystemMessage(fmt.Sprintf("Unknown thinking level: %q. Use: off, none, minimal, low, medium, high", args))
 		return nil
 	}

@@ -543,6 +543,23 @@ func (m *Kit) SetModel(ctx context.Context, modelString string) error {
 	systemPrompt, _ := config.LoadSystemPrompt(viper.GetString("system-prompt"))
 	thinkingLevel := models.ParseThinkingLevel(viper.GetString("thinking-level"))

+	// Validate and adjust thinking level for the target model.
+	// Some models (e.g., OpenAI gpt-5.4) don't support "minimal" and require "none".
+	if thinkingLevel != models.ThinkingOff {
+		parts := strings.SplitN(modelString, "/", 2)
+		if len(parts) == 2 {
+			modelName := parts[1]
+			if !models.IsValidThinkingLevelForModel(thinkingLevel, modelName) {
+				fallback := models.SuggestThinkingLevelFallback(thinkingLevel, modelName)
+				if fallback != models.ThinkingOff {
+					// Adjust the thinking level in viper so the change persists.
+					viper.Set("thinking-level", string(fallback))
+					thinkingLevel = fallback
+				}
+			}
+		}
+	}
+
 	// With message-level caching, thinking and caching can work together.
 	// No need to disable caching when thinking is enabled.
 	cfg := &models.ProviderConfig{
@@ -866,10 +883,10 @@ type Options struct {
 	MaxTokens int

 	// ThinkingLevel sets the reasoning effort for models that support
-	// extended thinking. Valid values: "off", "low", "medium", "high".
-	// "" = let the precedence chain resolve a level (env → config →
-	// per-model → "off"). Use [Kit.SetThinkingLevel] to change at
-	// runtime.
+	// extended thinking. Valid values: "off", "none", "minimal", "low",
+	// "medium", "high". "" = let the precedence chain resolve a level
+	// (env → config → per-model → "off"). Use [Kit.SetThinkingLevel]
+	// to change at runtime.
 	ThinkingLevel string

 	// Temperature controls sampling randomness (typically 0.0–2.0).
@@ -85,7 +85,7 @@ host, err := kit.New(ctx, &kit.Options{
    // resolve a value (KIT_* env → .kit.yml → modelSettings/customModels →
    // 8192 floor for MaxTokens, provider defaults for samplers).
    MaxTokens:        16384,             // 0 = auto-resolve; non-zero suppresses right-sizing
-    ThinkingLevel:    "medium",          // "off", "low", "medium", "high" ("" = default)
+    ThinkingLevel:    "medium",          // "off", "none", "minimal", "low", "medium", "high" ("" = default)
    Temperature:      ptrFloat32(0.2),   // pointer so explicit 0.0 != unset
    TopP:             nil,                // nil = leave provider/per-model default
    TopK:             nil,                // nil = leave provider/per-model default
@@ -154,7 +154,7 @@ func ptrFloat32(v float32) *float32 { return &v }
 | Field | Type | Empty/nil means | Notes |
 |-------|------|-----------------|-------|
 | `MaxTokens` | `int` | Auto-resolve (env → config → per-model → 8192 floor) | Non-zero suppresses `rightSizeMaxTokens` |
-| `ThinkingLevel` | `string` | Auto-resolve (→ `"off"`) | Valid: `"off"`, `"low"`, `"medium"`, `"high"` (and `"minimal"` for some providers) |
+| `ThinkingLevel` | `string` | Auto-resolve (→ `"off"`) | Valid: `"off"`, `"none"`, `"minimal"`, `"low"`, `"medium"`, `"high"` |
 | `Temperature` | `*float32` | Leave provider/per-model default | Pointer so explicit `0.0` ≠ unset |
 | `TopP` | `*float32` | Leave provider/per-model default | |
 | `TopK` | `*int32` | Leave provider/per-model default | |
@@ -66,7 +66,7 @@ These commands are available inside the Kit TUI during an interactive session:
 | `/servers` | Show connected MCP servers |
 | `/model [name]` | Switch model or open model selector |
 | `/theme [name]` | Switch color theme or list available themes |
-| `/thinking [level]` | Set thinking level (off, minimal, low, medium, high) |
+| `/thinking [level]` | Set thinking level (off, none, minimal, low, medium, high) |
 | `/compact [focus]` | Summarize older messages to free context |
 | `/clear` | Clear conversation |
 | `/clear-queue` | Clear queued messages |
@@ -59,7 +59,7 @@ These flags control Kit's behavior. When a prompt is passed as a positional argu
 | `--stop-sequences` | — | — | Custom stop sequences (comma-separated) |
 | `--frequency-penalty` | — | `0.0` | Penalize frequent tokens (0.0–2.0) |
 | `--presence-penalty` | — | `0.0` | Penalize present tokens (0.0–2.0) |
-| `--thinking-level` | — | `off` | Extended thinking level: off, minimal, low, medium, high |
+| `--thinking-level` | — | `off` | Extended thinking level: off, none, minimal, low, medium, high |

 ## System

@@ -37,7 +37,7 @@ stream: true
 | `compact` | bool | `false` | Enable compact output mode |
 | `system-prompt` | string | — | System prompt text or file path |
 | `max-steps` | int | `0` | Maximum agent steps (0 = unlimited) |
-| `thinking-level` | string | `off` | Extended thinking: off, minimal, low, medium, high |
+| `thinking-level` | string | `off` | Extended thinking: off, none, minimal, low, medium, high |
 | `provider-api-key` | string | — | API key for the provider |
 | `provider-url` | string | — | Base URL for provider API |
 | `tls-skip-verify` | bool | `false` | Skip TLS certificate verification |
@@ -24,7 +24,7 @@ host, err := kit.New(ctx, &kit.Options{

    // Generation parameters (override env/config/per-model defaults)
    MaxTokens:        16384,              // 0 = auto-resolve; non-zero suppresses right-sizing
-    ThinkingLevel:    "medium",           // "off", "low", "medium", "high"
+    ThinkingLevel:    "medium",           // "off", "none", "minimal", "low", "medium", "high"
    Temperature:      ptrFloat32(0.2),    // pointer so explicit 0.0 != unset
    TopP:             nil,                 // nil = provider/per-model default
    TopK:             nil,
@@ -107,7 +107,7 @@ defaults for samplers).
 | Field | Type | Default | Description |
 |-------|------|---------|-------------|
 | `MaxTokens` | `int` | auto-resolved | Max output tokens per response. `0` = auto-resolve; non-zero suppresses automatic right-sizing (same semantics as `--max-tokens`). |
-| `ThinkingLevel` | `string` | auto-resolved | Reasoning effort: `"off"`, `"low"`, `"medium"`, `"high"` (some providers also accept `"minimal"`). `""` falls through to config/env/per-model/`"off"`. |
+| `ThinkingLevel` | `string` | auto-resolved | Reasoning effort: `"off"`, `"none"`, `"minimal"`, `"low"`, `"medium"`, `"high"`. `""` falls through to config/env/per-model/`"off"`. |
 | `Temperature` | `*float32` | — | Sampling randomness. Pointer type so explicit `0.0` is distinguishable from "unset". |
 | `TopP` | `*float32` | — | Nucleus sampling cutoff. `nil` leaves provider/per-model default. |
 | `TopK` | `*int32` | — | Top-K sampling limit. `nil` leaves provider/per-model default. |
@@ -115,7 +115,7 @@ entirely in-code via `Options`, without touching `.kit.yml` or `viper.Set()`:
 host, _ := kit.New(ctx, &kit.Options{
    Model:          "anthropic/claude-sonnet-4-5-20250929",
    MaxTokens:      16384,             // 0 = auto-resolve (env → config → per-model → floor)
-    ThinkingLevel:  "high",            // "off" | "low" | "medium" | "high"
+    ThinkingLevel:  "high",            // "off" | "none" | "minimal" | "low" | "medium" | "high"
    Temperature:    ptrFloat32(0.2),   // nil = provider/per-model default
    ProviderAPIKey: os.Getenv("MY_SECRET"), // overrides pre-existing viper state
    ProviderURL:    "https://proxy.internal/v1",