fix(models): add gpt-5.4 reasoning level support with auto-adjustment

Adds 'none' thinking level to support OpenAI gpt-5.4 models which use
'reasoning_effort: none' instead of 'minimal'. Includes validation and
auto-adjustment when switching models with incompatible levels.

- Add ThinkingNone constant mapping to ReasoningEffortNone
- Add IsValidThinkingLevelForModel() with gpt-5.4 detection
- Add SuggestThinkingLevelFallback() for level migration
- Auto-adjust thinking level on model switch with user notification
- Update all docs to include 'none' in valid levels

Fixes #11
This commit is contained in:
Ed Zynda
2026-04-21 20:19:00 +03:00
parent c1dee3ceba
commit 3ff701054a
12 changed files with 145 additions and 22 deletions
+3 -3
View File
@@ -126,7 +126,7 @@ model: anthropic/claude-sonnet-latest
max-tokens: 4096
temperature: 0.7
stream: true
thinking-level: off # off, minimal, low, medium, high
thinking-level: off # off, none, minimal, low, medium, high
```
All of the above keys can also be set programmatically via the SDK
@@ -199,7 +199,7 @@ mcpServers:
--stop-sequences Custom stop sequences (comma-separated)
--frequency-penalty Penalize frequent tokens 0.0-2.0 (default: 0.0)
--presence-penalty Penalize present tokens 0.0-2.0 (default: 0.0)
--thinking-level Extended thinking level: off, minimal, low, medium, high (default: off)
--thinking-level Extended thinking level: off, none, minimal, low, medium, high (default: off)
# System
--config Config file path (default: ~/.kit.yml)
@@ -548,7 +548,7 @@ host, err := kit.New(ctx, &kit.Options{
// Generation parameters (override env/config/per-model defaults)
MaxTokens: 16384, // 0 = auto-resolve (env → config → per-model → 8192 floor)
ThinkingLevel: "medium", // "off", "low", "medium", "high"
ThinkingLevel: "medium", // "off", "none", "minimal", "low", "medium", "high"
Temperature: ptr(float32(0.2)), // pointer so 0.0 != unset; nil = provider default
TopP: nil, // nil = leave provider/per-model default
TopK: nil,
+1 -1
View File
@@ -304,7 +304,7 @@ func init() {
flags.Float32Var(&frequencyPenalty, "frequency-penalty", 0.0, "penalizes tokens based on frequency of appearance (0.0-2.0)")
flags.Float32Var(&presencePenalty, "presence-penalty", 0.0, "penalizes tokens based on whether they have appeared (0.0-2.0)")
flags.StringSliceVar(&stopSequences, "stop-sequences", nil, "custom stop sequences (comma-separated)")
flags.StringVar(&thinkingLevel, "thinking-level", "off", "extended thinking level: off, minimal, low, medium, high")
flags.StringVar(&thinkingLevel, "thinking-level", "off", "extended thinking level: off, none, minimal, low, medium, high")
// Ollama-specific parameters
flags.Int32Var(&numGPU, "num-gpu-layers", -1, "number of model layers to offload to GPU for Ollama models (-1 for auto-detect)")
+60 -3
View File
@@ -85,6 +85,7 @@ type ThinkingLevel string
const (
ThinkingOff ThinkingLevel = "off"
ThinkingNone ThinkingLevel = "none"
ThinkingMinimal ThinkingLevel = "minimal"
ThinkingLow ThinkingLevel = "low"
ThinkingMedium ThinkingLevel = "medium"
@@ -93,12 +94,14 @@ const (
// ThinkingLevels returns the ordered list of available thinking levels for cycling.
func ThinkingLevels() []ThinkingLevel {
return []ThinkingLevel{ThinkingOff, ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh}
return []ThinkingLevel{ThinkingOff, ThinkingNone, ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh}
}
// thinkingBudgetTokens returns the token budget for a thinking level, or 0 for "off".
// thinkingBudgetTokens returns the token budget for a thinking level, or 0 for "off" or "none".
func thinkingBudgetTokens(level ThinkingLevel) int64 {
switch level {
case ThinkingNone:
return 1024
case ThinkingMinimal:
return 1024
case ThinkingLow:
@@ -117,6 +120,8 @@ func ThinkingLevelDescription(level ThinkingLevel) string {
switch level {
case ThinkingOff:
return "No reasoning"
case ThinkingNone:
return "Minimal reasoning (OpenAI 'none')"
case ThinkingMinimal:
return "Very brief reasoning (~1k tokens)"
case ThinkingLow:
@@ -133,7 +138,7 @@ func ThinkingLevelDescription(level ThinkingLevel) string {
// ParseThinkingLevel converts a string to a ThinkingLevel, defaulting to ThinkingOff.
func ParseThinkingLevel(s string) ThinkingLevel {
switch ThinkingLevel(s) {
case ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh:
case ThinkingNone, ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh:
return ThinkingLevel(s)
default:
return ThinkingOff
@@ -580,6 +585,8 @@ func buildOpenAIProviderOptions(config *ProviderConfig, modelName string) fantas
// Returns nil for ThinkingOff (use the model's default).
func thinkingLevelToReasoningEffort(level ThinkingLevel) *openai.ReasoningEffort {
switch level {
case ThinkingNone:
return new(openai.ReasoningEffortNone)
case ThinkingMinimal:
return new(openai.ReasoningEffortMinimal)
case ThinkingLow:
@@ -593,6 +600,56 @@ func thinkingLevelToReasoningEffort(level ThinkingLevel) *openai.ReasoningEffort
}
}
// IsValidThinkingLevelForModel checks if a thinking level is valid for the given
// model. Some OpenAI models like gpt-5.4 don't support "minimal" and require
// "none" instead.
func IsValidThinkingLevelForModel(level ThinkingLevel, modelName string) bool {
if level == ThinkingOff {
return true
}
// Check if this is an OpenAI model that doesn't support "minimal"
// gpt-5.4 and newer gpt-5.x models use "none" instead of "minimal"
if level == ThinkingMinimal {
if strings.Contains(modelName, "gpt-5.4") ||
strings.Contains(modelName, "gpt-5-pro") ||
strings.Contains(modelName, "gpt-5-chat") {
return false
}
}
// Check if this is an OpenAI model that doesn't support "none"
// Older gpt-5 models only support "minimal", not "none"
if level == ThinkingNone {
if strings.Contains(modelName, "gpt-5") &&
!strings.Contains(modelName, "gpt-5.4") &&
!strings.Contains(modelName, "gpt-5-pro") &&
!strings.Contains(modelName, "gpt-5-chat") {
// Older gpt-5 models might not support "none"
// They only added "none" support in newer versions
return false
}
}
// All other levels are generally valid for reasoning models
return true
}
// SuggestThinkingLevelFallback returns a recommended fallback level when the
// requested level is not valid for the model. Returns ThinkingOff if no
// suitable fallback exists.
func SuggestThinkingLevelFallback(level ThinkingLevel, modelName string) ThinkingLevel {
if level == ThinkingMinimal && !IsValidThinkingLevelForModel(level, modelName) {
// For models that don't support "minimal", suggest "none" (~same token budget)
return ThinkingNone
}
if level == ThinkingNone && !IsValidThinkingLevelForModel(level, modelName) {
// For models that don't support "none", suggest "minimal" (~same token budget)
return ThinkingMinimal
}
return ThinkingOff
}
// buildAnthropicProviderOptions returns fantasy.ProviderOptions configured for
// Anthropic models with extended thinking. When thinking is enabled, it sets
// SendReasoning to true and configures the thinking budget. For thinking-off
+1 -1
View File
@@ -84,7 +84,7 @@ var SlashCommands = []SlashCommand{
},
{
Name: "/thinking",
Description: "Set thinking/reasoning level (off, minimal, low, medium, high)",
Description: "Set thinking/reasoning level (off, none, minimal, low, medium, high)",
Category: "System",
Aliases: []string{"/think"},
Complete: func(prefix string) []string {
+51 -2
View File
@@ -1142,6 +1142,31 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
m.state = stateInput
if m.setModel != nil {
previousModel := m.providerName + "/" + m.modelName
// Check if thinking level needs adjustment for the new model.
// Some models (e.g., OpenAI gpt-5.4) don't support "minimal" and require "none".
if m.thinkingLevel != "" && m.thinkingLevel != "off" {
parts := strings.SplitN(msg.ModelString, "/", 2)
if len(parts) == 2 {
modelName := parts[1]
currentLevel := models.ParseThinkingLevel(m.thinkingLevel)
if !models.IsValidThinkingLevelForModel(currentLevel, modelName) {
fallback := models.SuggestThinkingLevelFallback(currentLevel, modelName)
if fallback != models.ThinkingOff {
m.printSystemMessage(fmt.Sprintf(
"Note: Model %s doesn't support '%s' thinking level. Adjusted to '%s'.",
modelName, currentLevel, fallback,
))
m.thinkingLevel = string(fallback)
if m.setThinkingLevel != nil {
_ = m.setThinkingLevel(string(fallback))
}
go func() { _ = prefs.SaveThinkingLevelPreference(string(fallback)) }()
}
}
}
}
if err := m.setModel(msg.ModelString); err != nil {
m.printSystemMessage(fmt.Sprintf("Failed to switch model: %v", err))
} else {
@@ -2656,7 +2681,7 @@ func (m *AppModel) renderStatusBar() string {
// cycleThinkingLevel advances to the next thinking level and applies it.
func (m *AppModel) cycleThinkingLevel() {
levels := []string{"off", "minimal", "low", "medium", "high"}
levels := []string{"off", "none", "minimal", "low", "medium", "high"}
current := m.thinkingLevel
if current == "" {
current = "off"
@@ -3841,6 +3866,30 @@ func (m *AppModel) handleModelCommand(args string) tea.Cmd {
return nil
}
// Check if thinking level needs adjustment for the new model.
// Some models (e.g., OpenAI gpt-5.4) don't support "minimal" and require "none".
if m.thinkingLevel != "" && m.thinkingLevel != "off" {
parts := strings.SplitN(args, "/", 2)
if len(parts) == 2 {
modelName := parts[1]
currentLevel := models.ParseThinkingLevel(m.thinkingLevel)
if !models.IsValidThinkingLevelForModel(currentLevel, modelName) {
fallback := models.SuggestThinkingLevelFallback(currentLevel, modelName)
if fallback != models.ThinkingOff {
m.printSystemMessage(fmt.Sprintf(
"Note: Model %s doesn't support '%s' thinking level. Adjusted to '%s'.",
modelName, currentLevel, fallback,
))
m.thinkingLevel = string(fallback)
if m.setThinkingLevel != nil {
_ = m.setThinkingLevel(string(fallback))
}
go func() { _ = prefs.SaveThinkingLevelPreference(string(fallback)) }()
}
}
}
}
// Direct model switch with the provided model string.
previousModel := m.providerName + "/" + m.modelName
if err := m.setModel(args); err != nil {
@@ -3945,7 +3994,7 @@ func (m *AppModel) handleThinkingCommand(args string) tea.Cmd {
// Parse and validate the level.
level := models.ParseThinkingLevel(args)
if string(level) != strings.ToLower(args) {
m.printSystemMessage(fmt.Sprintf("Unknown thinking level: %q. Use: off, minimal, low, medium, high", args))
m.printSystemMessage(fmt.Sprintf("Unknown thinking level: %q. Use: off, none, minimal, low, medium, high", args))
return nil
}
+21 -4
View File
@@ -543,6 +543,23 @@ func (m *Kit) SetModel(ctx context.Context, modelString string) error {
systemPrompt, _ := config.LoadSystemPrompt(viper.GetString("system-prompt"))
thinkingLevel := models.ParseThinkingLevel(viper.GetString("thinking-level"))
// Validate and adjust thinking level for the target model.
// Some models (e.g., OpenAI gpt-5.4) don't support "minimal" and require "none".
if thinkingLevel != models.ThinkingOff {
parts := strings.SplitN(modelString, "/", 2)
if len(parts) == 2 {
modelName := parts[1]
if !models.IsValidThinkingLevelForModel(thinkingLevel, modelName) {
fallback := models.SuggestThinkingLevelFallback(thinkingLevel, modelName)
if fallback != models.ThinkingOff {
// Adjust the thinking level in viper so the change persists.
viper.Set("thinking-level", string(fallback))
thinkingLevel = fallback
}
}
}
}
// With message-level caching, thinking and caching can work together.
// No need to disable caching when thinking is enabled.
cfg := &models.ProviderConfig{
@@ -866,10 +883,10 @@ type Options struct {
MaxTokens int
// ThinkingLevel sets the reasoning effort for models that support
// extended thinking. Valid values: "off", "low", "medium", "high".
// "" = let the precedence chain resolve a level (env → config →
// per-model → "off"). Use [Kit.SetThinkingLevel] to change at
// runtime.
// extended thinking. Valid values: "off", "none", "minimal", "low",
// "medium", "high". "" = let the precedence chain resolve a level
// (env → config → per-model → "off"). Use [Kit.SetThinkingLevel]
// to change at runtime.
ThinkingLevel string
// Temperature controls sampling randomness (typically 0.02.0).
+2 -2
View File
@@ -85,7 +85,7 @@ host, err := kit.New(ctx, &kit.Options{
// resolve a value (KIT_* env → .kit.yml → modelSettings/customModels →
// 8192 floor for MaxTokens, provider defaults for samplers).
MaxTokens: 16384, // 0 = auto-resolve; non-zero suppresses right-sizing
ThinkingLevel: "medium", // "off", "low", "medium", "high" ("" = default)
ThinkingLevel: "medium", // "off", "none", "minimal", "low", "medium", "high" ("" = default)
Temperature: ptrFloat32(0.2), // pointer so explicit 0.0 != unset
TopP: nil, // nil = leave provider/per-model default
TopK: nil, // nil = leave provider/per-model default
@@ -154,7 +154,7 @@ func ptrFloat32(v float32) *float32 { return &v }
| Field | Type | Empty/nil means | Notes |
|-------|------|-----------------|-------|
| `MaxTokens` | `int` | Auto-resolve (env → config → per-model → 8192 floor) | Non-zero suppresses `rightSizeMaxTokens` |
| `ThinkingLevel` | `string` | Auto-resolve (→ `"off"`) | Valid: `"off"`, `"low"`, `"medium"`, `"high"` (and `"minimal"` for some providers) |
| `ThinkingLevel` | `string` | Auto-resolve (→ `"off"`) | Valid: `"off"`, `"none"`, `"minimal"`, `"low"`, `"medium"`, `"high"` |
| `Temperature` | `*float32` | Leave provider/per-model default | Pointer so explicit `0.0` ≠ unset |
| `TopP` | `*float32` | Leave provider/per-model default | |
| `TopK` | `*int32` | Leave provider/per-model default | |
+1 -1
View File
@@ -66,7 +66,7 @@ These commands are available inside the Kit TUI during an interactive session:
| `/servers` | Show connected MCP servers |
| `/model [name]` | Switch model or open model selector |
| `/theme [name]` | Switch color theme or list available themes |
| `/thinking [level]` | Set thinking level (off, minimal, low, medium, high) |
| `/thinking [level]` | Set thinking level (off, none, minimal, low, medium, high) |
| `/compact [focus]` | Summarize older messages to free context |
| `/clear` | Clear conversation |
| `/clear-queue` | Clear queued messages |
+1 -1
View File
@@ -59,7 +59,7 @@ These flags control Kit's behavior. When a prompt is passed as a positional argu
| `--stop-sequences` | — | — | Custom stop sequences (comma-separated) |
| `--frequency-penalty` | — | `0.0` | Penalize frequent tokens (0.02.0) |
| `--presence-penalty` | — | `0.0` | Penalize present tokens (0.02.0) |
| `--thinking-level` | — | `off` | Extended thinking level: off, minimal, low, medium, high |
| `--thinking-level` | — | `off` | Extended thinking level: off, none, minimal, low, medium, high |
## System
+1 -1
View File
@@ -37,7 +37,7 @@ stream: true
| `compact` | bool | `false` | Enable compact output mode |
| `system-prompt` | string | — | System prompt text or file path |
| `max-steps` | int | `0` | Maximum agent steps (0 = unlimited) |
| `thinking-level` | string | `off` | Extended thinking: off, minimal, low, medium, high |
| `thinking-level` | string | `off` | Extended thinking: off, none, minimal, low, medium, high |
| `provider-api-key` | string | — | API key for the provider |
| `provider-url` | string | — | Base URL for provider API |
| `tls-skip-verify` | bool | `false` | Skip TLS certificate verification |
+2 -2
View File
@@ -24,7 +24,7 @@ host, err := kit.New(ctx, &kit.Options{
// Generation parameters (override env/config/per-model defaults)
MaxTokens: 16384, // 0 = auto-resolve; non-zero suppresses right-sizing
ThinkingLevel: "medium", // "off", "low", "medium", "high"
ThinkingLevel: "medium", // "off", "none", "minimal", "low", "medium", "high"
Temperature: ptrFloat32(0.2), // pointer so explicit 0.0 != unset
TopP: nil, // nil = provider/per-model default
TopK: nil,
@@ -107,7 +107,7 @@ defaults for samplers).
| Field | Type | Default | Description |
|-------|------|---------|-------------|
| `MaxTokens` | `int` | auto-resolved | Max output tokens per response. `0` = auto-resolve; non-zero suppresses automatic right-sizing (same semantics as `--max-tokens`). |
| `ThinkingLevel` | `string` | auto-resolved | Reasoning effort: `"off"`, `"low"`, `"medium"`, `"high"` (some providers also accept `"minimal"`). `""` falls through to config/env/per-model/`"off"`. |
| `ThinkingLevel` | `string` | auto-resolved | Reasoning effort: `"off"`, `"none"`, `"minimal"`, `"low"`, `"medium"`, `"high"`. `""` falls through to config/env/per-model/`"off"`. |
| `Temperature` | `*float32` | — | Sampling randomness. Pointer type so explicit `0.0` is distinguishable from "unset". |
| `TopP` | `*float32` | — | Nucleus sampling cutoff. `nil` leaves provider/per-model default. |
| `TopK` | `*int32` | — | Top-K sampling limit. `nil` leaves provider/per-model default. |
+1 -1
View File
@@ -115,7 +115,7 @@ entirely in-code via `Options`, without touching `.kit.yml` or `viper.Set()`:
host, _ := kit.New(ctx, &kit.Options{
Model: "anthropic/claude-sonnet-4-5-20250929",
MaxTokens: 16384, // 0 = auto-resolve (env → config → per-model → floor)
ThinkingLevel: "high", // "off" | "low" | "medium" | "high"
ThinkingLevel: "high", // "off" | "none" | "minimal" | "low" | "medium" | "high"
Temperature: ptrFloat32(0.2), // nil = provider/per-model default
ProviderAPIKey: os.Getenv("MY_SECRET"), // overrides pre-existing viper state
ProviderURL: "https://proxy.internal/v1",