From 3ff701054a85cd914255e0d595be0239e28c7ee1 Mon Sep 17 00:00:00 2001 From: Ed Zynda Date: Tue, 21 Apr 2026 20:19:00 +0300 Subject: [PATCH] fix(models): add gpt-5.4 reasoning level support with auto-adjustment Adds 'none' thinking level to support OpenAI gpt-5.4 models which use 'reasoning_effort: none' instead of 'minimal'. Includes validation and auto-adjustment when switching models with incompatible levels. - Add ThinkingNone constant mapping to ReasoningEffortNone - Add IsValidThinkingLevelForModel() with gpt-5.4 detection - Add SuggestThinkingLevelFallback() for level migration - Auto-adjust thinking level on model switch with user notification - Update all docs to include 'none' in valid levels Fixes #11 --- README.md | 6 +-- cmd/root.go | 2 +- internal/models/providers.go | 63 ++++++++++++++++++++++++++++++-- internal/ui/commands/commands.go | 2 +- internal/ui/model.go | 53 ++++++++++++++++++++++++++- pkg/kit/kit.go | 25 +++++++++++-- skills/kit-sdk/SKILL.md | 4 +- www/pages/cli/commands.md | 2 +- www/pages/cli/flags.md | 2 +- www/pages/configuration.md | 2 +- www/pages/sdk/options.md | 4 +- www/pages/sdk/overview.md | 2 +- 12 files changed, 145 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 70d3c986..1152b4fc 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,7 @@ model: anthropic/claude-sonnet-latest max-tokens: 4096 temperature: 0.7 stream: true -thinking-level: off # off, minimal, low, medium, high +thinking-level: off # off, none, minimal, low, medium, high ``` All of the above keys can also be set programmatically via the SDK @@ -199,7 +199,7 @@ mcpServers: --stop-sequences Custom stop sequences (comma-separated) --frequency-penalty Penalize frequent tokens 0.0-2.0 (default: 0.0) --presence-penalty Penalize present tokens 0.0-2.0 (default: 0.0) ---thinking-level Extended thinking level: off, minimal, low, medium, high (default: off) +--thinking-level Extended thinking level: off, none, minimal, low, medium, high (default: off) # System --config Config file path (default: ~/.kit.yml) @@ -548,7 +548,7 @@ host, err := kit.New(ctx, &kit.Options{ // Generation parameters (override env/config/per-model defaults) MaxTokens: 16384, // 0 = auto-resolve (env → config → per-model → 8192 floor) - ThinkingLevel: "medium", // "off", "low", "medium", "high" + ThinkingLevel: "medium", // "off", "none", "minimal", "low", "medium", "high" Temperature: ptr(float32(0.2)), // pointer so 0.0 != unset; nil = provider default TopP: nil, // nil = leave provider/per-model default TopK: nil, diff --git a/cmd/root.go b/cmd/root.go index 250918cc..cbab5c69 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -304,7 +304,7 @@ func init() { flags.Float32Var(&frequencyPenalty, "frequency-penalty", 0.0, "penalizes tokens based on frequency of appearance (0.0-2.0)") flags.Float32Var(&presencePenalty, "presence-penalty", 0.0, "penalizes tokens based on whether they have appeared (0.0-2.0)") flags.StringSliceVar(&stopSequences, "stop-sequences", nil, "custom stop sequences (comma-separated)") - flags.StringVar(&thinkingLevel, "thinking-level", "off", "extended thinking level: off, minimal, low, medium, high") + flags.StringVar(&thinkingLevel, "thinking-level", "off", "extended thinking level: off, none, minimal, low, medium, high") // Ollama-specific parameters flags.Int32Var(&numGPU, "num-gpu-layers", -1, "number of model layers to offload to GPU for Ollama models (-1 for auto-detect)") diff --git a/internal/models/providers.go b/internal/models/providers.go index f296b48d..b4ba21fc 100644 --- a/internal/models/providers.go +++ b/internal/models/providers.go @@ -85,6 +85,7 @@ type ThinkingLevel string const ( ThinkingOff ThinkingLevel = "off" + ThinkingNone ThinkingLevel = "none" ThinkingMinimal ThinkingLevel = "minimal" ThinkingLow ThinkingLevel = "low" ThinkingMedium ThinkingLevel = "medium" @@ -93,12 +94,14 @@ const ( // ThinkingLevels returns the ordered list of available thinking levels for cycling. func ThinkingLevels() []ThinkingLevel { - return []ThinkingLevel{ThinkingOff, ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh} + return []ThinkingLevel{ThinkingOff, ThinkingNone, ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh} } -// thinkingBudgetTokens returns the token budget for a thinking level, or 0 for "off". +// thinkingBudgetTokens returns the token budget for a thinking level, or 0 for "off" or "none". func thinkingBudgetTokens(level ThinkingLevel) int64 { switch level { + case ThinkingNone: + return 1024 case ThinkingMinimal: return 1024 case ThinkingLow: @@ -117,6 +120,8 @@ func ThinkingLevelDescription(level ThinkingLevel) string { switch level { case ThinkingOff: return "No reasoning" + case ThinkingNone: + return "Minimal reasoning (OpenAI 'none')" case ThinkingMinimal: return "Very brief reasoning (~1k tokens)" case ThinkingLow: @@ -133,7 +138,7 @@ func ThinkingLevelDescription(level ThinkingLevel) string { // ParseThinkingLevel converts a string to a ThinkingLevel, defaulting to ThinkingOff. func ParseThinkingLevel(s string) ThinkingLevel { switch ThinkingLevel(s) { - case ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh: + case ThinkingNone, ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh: return ThinkingLevel(s) default: return ThinkingOff @@ -580,6 +585,8 @@ func buildOpenAIProviderOptions(config *ProviderConfig, modelName string) fantas // Returns nil for ThinkingOff (use the model's default). func thinkingLevelToReasoningEffort(level ThinkingLevel) *openai.ReasoningEffort { switch level { + case ThinkingNone: + return new(openai.ReasoningEffortNone) case ThinkingMinimal: return new(openai.ReasoningEffortMinimal) case ThinkingLow: @@ -593,6 +600,56 @@ func thinkingLevelToReasoningEffort(level ThinkingLevel) *openai.ReasoningEffort } } +// IsValidThinkingLevelForModel checks if a thinking level is valid for the given +// model. Some OpenAI models like gpt-5.4 don't support "minimal" and require +// "none" instead. +func IsValidThinkingLevelForModel(level ThinkingLevel, modelName string) bool { + if level == ThinkingOff { + return true + } + + // Check if this is an OpenAI model that doesn't support "minimal" + // gpt-5.4 and newer gpt-5.x models use "none" instead of "minimal" + if level == ThinkingMinimal { + if strings.Contains(modelName, "gpt-5.4") || + strings.Contains(modelName, "gpt-5-pro") || + strings.Contains(modelName, "gpt-5-chat") { + return false + } + } + + // Check if this is an OpenAI model that doesn't support "none" + // Older gpt-5 models only support "minimal", not "none" + if level == ThinkingNone { + if strings.Contains(modelName, "gpt-5") && + !strings.Contains(modelName, "gpt-5.4") && + !strings.Contains(modelName, "gpt-5-pro") && + !strings.Contains(modelName, "gpt-5-chat") { + // Older gpt-5 models might not support "none" + // They only added "none" support in newer versions + return false + } + } + + // All other levels are generally valid for reasoning models + return true +} + +// SuggestThinkingLevelFallback returns a recommended fallback level when the +// requested level is not valid for the model. Returns ThinkingOff if no +// suitable fallback exists. +func SuggestThinkingLevelFallback(level ThinkingLevel, modelName string) ThinkingLevel { + if level == ThinkingMinimal && !IsValidThinkingLevelForModel(level, modelName) { + // For models that don't support "minimal", suggest "none" (~same token budget) + return ThinkingNone + } + if level == ThinkingNone && !IsValidThinkingLevelForModel(level, modelName) { + // For models that don't support "none", suggest "minimal" (~same token budget) + return ThinkingMinimal + } + return ThinkingOff +} + // buildAnthropicProviderOptions returns fantasy.ProviderOptions configured for // Anthropic models with extended thinking. When thinking is enabled, it sets // SendReasoning to true and configures the thinking budget. For thinking-off diff --git a/internal/ui/commands/commands.go b/internal/ui/commands/commands.go index 54da241d..a7ba8b40 100644 --- a/internal/ui/commands/commands.go +++ b/internal/ui/commands/commands.go @@ -84,7 +84,7 @@ var SlashCommands = []SlashCommand{ }, { Name: "/thinking", - Description: "Set thinking/reasoning level (off, minimal, low, medium, high)", + Description: "Set thinking/reasoning level (off, none, minimal, low, medium, high)", Category: "System", Aliases: []string{"/think"}, Complete: func(prefix string) []string { diff --git a/internal/ui/model.go b/internal/ui/model.go index 84d682d1..209bf5d9 100644 --- a/internal/ui/model.go +++ b/internal/ui/model.go @@ -1142,6 +1142,31 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) { m.state = stateInput if m.setModel != nil { previousModel := m.providerName + "/" + m.modelName + + // Check if thinking level needs adjustment for the new model. + // Some models (e.g., OpenAI gpt-5.4) don't support "minimal" and require "none". + if m.thinkingLevel != "" && m.thinkingLevel != "off" { + parts := strings.SplitN(msg.ModelString, "/", 2) + if len(parts) == 2 { + modelName := parts[1] + currentLevel := models.ParseThinkingLevel(m.thinkingLevel) + if !models.IsValidThinkingLevelForModel(currentLevel, modelName) { + fallback := models.SuggestThinkingLevelFallback(currentLevel, modelName) + if fallback != models.ThinkingOff { + m.printSystemMessage(fmt.Sprintf( + "Note: Model %s doesn't support '%s' thinking level. Adjusted to '%s'.", + modelName, currentLevel, fallback, + )) + m.thinkingLevel = string(fallback) + if m.setThinkingLevel != nil { + _ = m.setThinkingLevel(string(fallback)) + } + go func() { _ = prefs.SaveThinkingLevelPreference(string(fallback)) }() + } + } + } + } + if err := m.setModel(msg.ModelString); err != nil { m.printSystemMessage(fmt.Sprintf("Failed to switch model: %v", err)) } else { @@ -2656,7 +2681,7 @@ func (m *AppModel) renderStatusBar() string { // cycleThinkingLevel advances to the next thinking level and applies it. func (m *AppModel) cycleThinkingLevel() { - levels := []string{"off", "minimal", "low", "medium", "high"} + levels := []string{"off", "none", "minimal", "low", "medium", "high"} current := m.thinkingLevel if current == "" { current = "off" @@ -3841,6 +3866,30 @@ func (m *AppModel) handleModelCommand(args string) tea.Cmd { return nil } + // Check if thinking level needs adjustment for the new model. + // Some models (e.g., OpenAI gpt-5.4) don't support "minimal" and require "none". + if m.thinkingLevel != "" && m.thinkingLevel != "off" { + parts := strings.SplitN(args, "/", 2) + if len(parts) == 2 { + modelName := parts[1] + currentLevel := models.ParseThinkingLevel(m.thinkingLevel) + if !models.IsValidThinkingLevelForModel(currentLevel, modelName) { + fallback := models.SuggestThinkingLevelFallback(currentLevel, modelName) + if fallback != models.ThinkingOff { + m.printSystemMessage(fmt.Sprintf( + "Note: Model %s doesn't support '%s' thinking level. Adjusted to '%s'.", + modelName, currentLevel, fallback, + )) + m.thinkingLevel = string(fallback) + if m.setThinkingLevel != nil { + _ = m.setThinkingLevel(string(fallback)) + } + go func() { _ = prefs.SaveThinkingLevelPreference(string(fallback)) }() + } + } + } + } + // Direct model switch with the provided model string. previousModel := m.providerName + "/" + m.modelName if err := m.setModel(args); err != nil { @@ -3945,7 +3994,7 @@ func (m *AppModel) handleThinkingCommand(args string) tea.Cmd { // Parse and validate the level. level := models.ParseThinkingLevel(args) if string(level) != strings.ToLower(args) { - m.printSystemMessage(fmt.Sprintf("Unknown thinking level: %q. Use: off, minimal, low, medium, high", args)) + m.printSystemMessage(fmt.Sprintf("Unknown thinking level: %q. Use: off, none, minimal, low, medium, high", args)) return nil } diff --git a/pkg/kit/kit.go b/pkg/kit/kit.go index 1794457a..f2e19ce6 100644 --- a/pkg/kit/kit.go +++ b/pkg/kit/kit.go @@ -543,6 +543,23 @@ func (m *Kit) SetModel(ctx context.Context, modelString string) error { systemPrompt, _ := config.LoadSystemPrompt(viper.GetString("system-prompt")) thinkingLevel := models.ParseThinkingLevel(viper.GetString("thinking-level")) + // Validate and adjust thinking level for the target model. + // Some models (e.g., OpenAI gpt-5.4) don't support "minimal" and require "none". + if thinkingLevel != models.ThinkingOff { + parts := strings.SplitN(modelString, "/", 2) + if len(parts) == 2 { + modelName := parts[1] + if !models.IsValidThinkingLevelForModel(thinkingLevel, modelName) { + fallback := models.SuggestThinkingLevelFallback(thinkingLevel, modelName) + if fallback != models.ThinkingOff { + // Adjust the thinking level in viper so the change persists. + viper.Set("thinking-level", string(fallback)) + thinkingLevel = fallback + } + } + } + } + // With message-level caching, thinking and caching can work together. // No need to disable caching when thinking is enabled. cfg := &models.ProviderConfig{ @@ -866,10 +883,10 @@ type Options struct { MaxTokens int // ThinkingLevel sets the reasoning effort for models that support - // extended thinking. Valid values: "off", "low", "medium", "high". - // "" = let the precedence chain resolve a level (env → config → - // per-model → "off"). Use [Kit.SetThinkingLevel] to change at - // runtime. + // extended thinking. Valid values: "off", "none", "minimal", "low", + // "medium", "high". "" = let the precedence chain resolve a level + // (env → config → per-model → "off"). Use [Kit.SetThinkingLevel] + // to change at runtime. ThinkingLevel string // Temperature controls sampling randomness (typically 0.0–2.0). diff --git a/skills/kit-sdk/SKILL.md b/skills/kit-sdk/SKILL.md index 84f40128..059ac3fa 100644 --- a/skills/kit-sdk/SKILL.md +++ b/skills/kit-sdk/SKILL.md @@ -85,7 +85,7 @@ host, err := kit.New(ctx, &kit.Options{ // resolve a value (KIT_* env → .kit.yml → modelSettings/customModels → // 8192 floor for MaxTokens, provider defaults for samplers). MaxTokens: 16384, // 0 = auto-resolve; non-zero suppresses right-sizing - ThinkingLevel: "medium", // "off", "low", "medium", "high" ("" = default) + ThinkingLevel: "medium", // "off", "none", "minimal", "low", "medium", "high" ("" = default) Temperature: ptrFloat32(0.2), // pointer so explicit 0.0 != unset TopP: nil, // nil = leave provider/per-model default TopK: nil, // nil = leave provider/per-model default @@ -154,7 +154,7 @@ func ptrFloat32(v float32) *float32 { return &v } | Field | Type | Empty/nil means | Notes | |-------|------|-----------------|-------| | `MaxTokens` | `int` | Auto-resolve (env → config → per-model → 8192 floor) | Non-zero suppresses `rightSizeMaxTokens` | -| `ThinkingLevel` | `string` | Auto-resolve (→ `"off"`) | Valid: `"off"`, `"low"`, `"medium"`, `"high"` (and `"minimal"` for some providers) | +| `ThinkingLevel` | `string` | Auto-resolve (→ `"off"`) | Valid: `"off"`, `"none"`, `"minimal"`, `"low"`, `"medium"`, `"high"` | | `Temperature` | `*float32` | Leave provider/per-model default | Pointer so explicit `0.0` ≠ unset | | `TopP` | `*float32` | Leave provider/per-model default | | | `TopK` | `*int32` | Leave provider/per-model default | | diff --git a/www/pages/cli/commands.md b/www/pages/cli/commands.md index 769f6bd4..9e44b1c0 100644 --- a/www/pages/cli/commands.md +++ b/www/pages/cli/commands.md @@ -66,7 +66,7 @@ These commands are available inside the Kit TUI during an interactive session: | `/servers` | Show connected MCP servers | | `/model [name]` | Switch model or open model selector | | `/theme [name]` | Switch color theme or list available themes | -| `/thinking [level]` | Set thinking level (off, minimal, low, medium, high) | +| `/thinking [level]` | Set thinking level (off, none, minimal, low, medium, high) | | `/compact [focus]` | Summarize older messages to free context | | `/clear` | Clear conversation | | `/clear-queue` | Clear queued messages | diff --git a/www/pages/cli/flags.md b/www/pages/cli/flags.md index 52be0b5f..f85daa64 100644 --- a/www/pages/cli/flags.md +++ b/www/pages/cli/flags.md @@ -59,7 +59,7 @@ These flags control Kit's behavior. When a prompt is passed as a positional argu | `--stop-sequences` | — | — | Custom stop sequences (comma-separated) | | `--frequency-penalty` | — | `0.0` | Penalize frequent tokens (0.0–2.0) | | `--presence-penalty` | — | `0.0` | Penalize present tokens (0.0–2.0) | -| `--thinking-level` | — | `off` | Extended thinking level: off, minimal, low, medium, high | +| `--thinking-level` | — | `off` | Extended thinking level: off, none, minimal, low, medium, high | ## System diff --git a/www/pages/configuration.md b/www/pages/configuration.md index 2176f07d..0976436a 100644 --- a/www/pages/configuration.md +++ b/www/pages/configuration.md @@ -37,7 +37,7 @@ stream: true | `compact` | bool | `false` | Enable compact output mode | | `system-prompt` | string | — | System prompt text or file path | | `max-steps` | int | `0` | Maximum agent steps (0 = unlimited) | -| `thinking-level` | string | `off` | Extended thinking: off, minimal, low, medium, high | +| `thinking-level` | string | `off` | Extended thinking: off, none, minimal, low, medium, high | | `provider-api-key` | string | — | API key for the provider | | `provider-url` | string | — | Base URL for provider API | | `tls-skip-verify` | bool | `false` | Skip TLS certificate verification | diff --git a/www/pages/sdk/options.md b/www/pages/sdk/options.md index 1fd74564..be9d643c 100644 --- a/www/pages/sdk/options.md +++ b/www/pages/sdk/options.md @@ -24,7 +24,7 @@ host, err := kit.New(ctx, &kit.Options{ // Generation parameters (override env/config/per-model defaults) MaxTokens: 16384, // 0 = auto-resolve; non-zero suppresses right-sizing - ThinkingLevel: "medium", // "off", "low", "medium", "high" + ThinkingLevel: "medium", // "off", "none", "minimal", "low", "medium", "high" Temperature: ptrFloat32(0.2), // pointer so explicit 0.0 != unset TopP: nil, // nil = provider/per-model default TopK: nil, @@ -107,7 +107,7 @@ defaults for samplers). | Field | Type | Default | Description | |-------|------|---------|-------------| | `MaxTokens` | `int` | auto-resolved | Max output tokens per response. `0` = auto-resolve; non-zero suppresses automatic right-sizing (same semantics as `--max-tokens`). | -| `ThinkingLevel` | `string` | auto-resolved | Reasoning effort: `"off"`, `"low"`, `"medium"`, `"high"` (some providers also accept `"minimal"`). `""` falls through to config/env/per-model/`"off"`. | +| `ThinkingLevel` | `string` | auto-resolved | Reasoning effort: `"off"`, `"none"`, `"minimal"`, `"low"`, `"medium"`, `"high"`. `""` falls through to config/env/per-model/`"off"`. | | `Temperature` | `*float32` | — | Sampling randomness. Pointer type so explicit `0.0` is distinguishable from "unset". | | `TopP` | `*float32` | — | Nucleus sampling cutoff. `nil` leaves provider/per-model default. | | `TopK` | `*int32` | — | Top-K sampling limit. `nil` leaves provider/per-model default. | diff --git a/www/pages/sdk/overview.md b/www/pages/sdk/overview.md index b16c63d0..942d85fb 100644 --- a/www/pages/sdk/overview.md +++ b/www/pages/sdk/overview.md @@ -115,7 +115,7 @@ entirely in-code via `Options`, without touching `.kit.yml` or `viper.Set()`: host, _ := kit.New(ctx, &kit.Options{ Model: "anthropic/claude-sonnet-4-5-20250929", MaxTokens: 16384, // 0 = auto-resolve (env → config → per-model → floor) - ThinkingLevel: "high", // "off" | "low" | "medium" | "high" + ThinkingLevel: "high", // "off" | "none" | "minimal" | "low" | "medium" | "high" Temperature: ptrFloat32(0.2), // nil = provider/per-model default ProviderAPIKey: os.Getenv("MY_SECRET"), // overrides pre-existing viper state ProviderURL: "https://proxy.internal/v1",