mirror of
https://github.com/mark3labs/kit.git
synced 2026-06-13 19:20:06 +00:00
fix(models): add gpt-5.4 reasoning level support with auto-adjustment
Adds 'none' thinking level to support OpenAI gpt-5.4 models which use 'reasoning_effort: none' instead of 'minimal'. Includes validation and auto-adjustment when switching models with incompatible levels. - Add ThinkingNone constant mapping to ReasoningEffortNone - Add IsValidThinkingLevelForModel() with gpt-5.4 detection - Add SuggestThinkingLevelFallback() for level migration - Auto-adjust thinking level on model switch with user notification - Update all docs to include 'none' in valid levels Fixes #11
This commit is contained in:
@@ -126,7 +126,7 @@ model: anthropic/claude-sonnet-latest
|
||||
max-tokens: 4096
|
||||
temperature: 0.7
|
||||
stream: true
|
||||
thinking-level: off # off, minimal, low, medium, high
|
||||
thinking-level: off # off, none, minimal, low, medium, high
|
||||
```
|
||||
|
||||
All of the above keys can also be set programmatically via the SDK
|
||||
@@ -199,7 +199,7 @@ mcpServers:
|
||||
--stop-sequences Custom stop sequences (comma-separated)
|
||||
--frequency-penalty Penalize frequent tokens 0.0-2.0 (default: 0.0)
|
||||
--presence-penalty Penalize present tokens 0.0-2.0 (default: 0.0)
|
||||
--thinking-level Extended thinking level: off, minimal, low, medium, high (default: off)
|
||||
--thinking-level Extended thinking level: off, none, minimal, low, medium, high (default: off)
|
||||
|
||||
# System
|
||||
--config Config file path (default: ~/.kit.yml)
|
||||
@@ -548,7 +548,7 @@ host, err := kit.New(ctx, &kit.Options{
|
||||
|
||||
// Generation parameters (override env/config/per-model defaults)
|
||||
MaxTokens: 16384, // 0 = auto-resolve (env → config → per-model → 8192 floor)
|
||||
ThinkingLevel: "medium", // "off", "low", "medium", "high"
|
||||
ThinkingLevel: "medium", // "off", "none", "minimal", "low", "medium", "high"
|
||||
Temperature: ptr(float32(0.2)), // pointer so 0.0 != unset; nil = provider default
|
||||
TopP: nil, // nil = leave provider/per-model default
|
||||
TopK: nil,
|
||||
|
||||
+1
-1
@@ -304,7 +304,7 @@ func init() {
|
||||
flags.Float32Var(&frequencyPenalty, "frequency-penalty", 0.0, "penalizes tokens based on frequency of appearance (0.0-2.0)")
|
||||
flags.Float32Var(&presencePenalty, "presence-penalty", 0.0, "penalizes tokens based on whether they have appeared (0.0-2.0)")
|
||||
flags.StringSliceVar(&stopSequences, "stop-sequences", nil, "custom stop sequences (comma-separated)")
|
||||
flags.StringVar(&thinkingLevel, "thinking-level", "off", "extended thinking level: off, minimal, low, medium, high")
|
||||
flags.StringVar(&thinkingLevel, "thinking-level", "off", "extended thinking level: off, none, minimal, low, medium, high")
|
||||
|
||||
// Ollama-specific parameters
|
||||
flags.Int32Var(&numGPU, "num-gpu-layers", -1, "number of model layers to offload to GPU for Ollama models (-1 for auto-detect)")
|
||||
|
||||
@@ -85,6 +85,7 @@ type ThinkingLevel string
|
||||
|
||||
const (
|
||||
ThinkingOff ThinkingLevel = "off"
|
||||
ThinkingNone ThinkingLevel = "none"
|
||||
ThinkingMinimal ThinkingLevel = "minimal"
|
||||
ThinkingLow ThinkingLevel = "low"
|
||||
ThinkingMedium ThinkingLevel = "medium"
|
||||
@@ -93,12 +94,14 @@ const (
|
||||
|
||||
// ThinkingLevels returns the ordered list of available thinking levels for cycling.
|
||||
func ThinkingLevels() []ThinkingLevel {
|
||||
return []ThinkingLevel{ThinkingOff, ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh}
|
||||
return []ThinkingLevel{ThinkingOff, ThinkingNone, ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh}
|
||||
}
|
||||
|
||||
// thinkingBudgetTokens returns the token budget for a thinking level, or 0 for "off".
|
||||
// thinkingBudgetTokens returns the token budget for a thinking level, or 0 for "off" or "none".
|
||||
func thinkingBudgetTokens(level ThinkingLevel) int64 {
|
||||
switch level {
|
||||
case ThinkingNone:
|
||||
return 1024
|
||||
case ThinkingMinimal:
|
||||
return 1024
|
||||
case ThinkingLow:
|
||||
@@ -117,6 +120,8 @@ func ThinkingLevelDescription(level ThinkingLevel) string {
|
||||
switch level {
|
||||
case ThinkingOff:
|
||||
return "No reasoning"
|
||||
case ThinkingNone:
|
||||
return "Minimal reasoning (OpenAI 'none')"
|
||||
case ThinkingMinimal:
|
||||
return "Very brief reasoning (~1k tokens)"
|
||||
case ThinkingLow:
|
||||
@@ -133,7 +138,7 @@ func ThinkingLevelDescription(level ThinkingLevel) string {
|
||||
// ParseThinkingLevel converts a string to a ThinkingLevel, defaulting to ThinkingOff.
|
||||
func ParseThinkingLevel(s string) ThinkingLevel {
|
||||
switch ThinkingLevel(s) {
|
||||
case ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh:
|
||||
case ThinkingNone, ThinkingMinimal, ThinkingLow, ThinkingMedium, ThinkingHigh:
|
||||
return ThinkingLevel(s)
|
||||
default:
|
||||
return ThinkingOff
|
||||
@@ -580,6 +585,8 @@ func buildOpenAIProviderOptions(config *ProviderConfig, modelName string) fantas
|
||||
// Returns nil for ThinkingOff (use the model's default).
|
||||
func thinkingLevelToReasoningEffort(level ThinkingLevel) *openai.ReasoningEffort {
|
||||
switch level {
|
||||
case ThinkingNone:
|
||||
return new(openai.ReasoningEffortNone)
|
||||
case ThinkingMinimal:
|
||||
return new(openai.ReasoningEffortMinimal)
|
||||
case ThinkingLow:
|
||||
@@ -593,6 +600,56 @@ func thinkingLevelToReasoningEffort(level ThinkingLevel) *openai.ReasoningEffort
|
||||
}
|
||||
}
|
||||
|
||||
// IsValidThinkingLevelForModel checks if a thinking level is valid for the given
|
||||
// model. Some OpenAI models like gpt-5.4 don't support "minimal" and require
|
||||
// "none" instead.
|
||||
func IsValidThinkingLevelForModel(level ThinkingLevel, modelName string) bool {
|
||||
if level == ThinkingOff {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check if this is an OpenAI model that doesn't support "minimal"
|
||||
// gpt-5.4 and newer gpt-5.x models use "none" instead of "minimal"
|
||||
if level == ThinkingMinimal {
|
||||
if strings.Contains(modelName, "gpt-5.4") ||
|
||||
strings.Contains(modelName, "gpt-5-pro") ||
|
||||
strings.Contains(modelName, "gpt-5-chat") {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Check if this is an OpenAI model that doesn't support "none"
|
||||
// Older gpt-5 models only support "minimal", not "none"
|
||||
if level == ThinkingNone {
|
||||
if strings.Contains(modelName, "gpt-5") &&
|
||||
!strings.Contains(modelName, "gpt-5.4") &&
|
||||
!strings.Contains(modelName, "gpt-5-pro") &&
|
||||
!strings.Contains(modelName, "gpt-5-chat") {
|
||||
// Older gpt-5 models might not support "none"
|
||||
// They only added "none" support in newer versions
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// All other levels are generally valid for reasoning models
|
||||
return true
|
||||
}
|
||||
|
||||
// SuggestThinkingLevelFallback returns a recommended fallback level when the
|
||||
// requested level is not valid for the model. Returns ThinkingOff if no
|
||||
// suitable fallback exists.
|
||||
func SuggestThinkingLevelFallback(level ThinkingLevel, modelName string) ThinkingLevel {
|
||||
if level == ThinkingMinimal && !IsValidThinkingLevelForModel(level, modelName) {
|
||||
// For models that don't support "minimal", suggest "none" (~same token budget)
|
||||
return ThinkingNone
|
||||
}
|
||||
if level == ThinkingNone && !IsValidThinkingLevelForModel(level, modelName) {
|
||||
// For models that don't support "none", suggest "minimal" (~same token budget)
|
||||
return ThinkingMinimal
|
||||
}
|
||||
return ThinkingOff
|
||||
}
|
||||
|
||||
// buildAnthropicProviderOptions returns fantasy.ProviderOptions configured for
|
||||
// Anthropic models with extended thinking. When thinking is enabled, it sets
|
||||
// SendReasoning to true and configures the thinking budget. For thinking-off
|
||||
|
||||
@@ -84,7 +84,7 @@ var SlashCommands = []SlashCommand{
|
||||
},
|
||||
{
|
||||
Name: "/thinking",
|
||||
Description: "Set thinking/reasoning level (off, minimal, low, medium, high)",
|
||||
Description: "Set thinking/reasoning level (off, none, minimal, low, medium, high)",
|
||||
Category: "System",
|
||||
Aliases: []string{"/think"},
|
||||
Complete: func(prefix string) []string {
|
||||
|
||||
+51
-2
@@ -1142,6 +1142,31 @@ func (m *AppModel) Update(msg tea.Msg) (tea.Model, tea.Cmd) {
|
||||
m.state = stateInput
|
||||
if m.setModel != nil {
|
||||
previousModel := m.providerName + "/" + m.modelName
|
||||
|
||||
// Check if thinking level needs adjustment for the new model.
|
||||
// Some models (e.g., OpenAI gpt-5.4) don't support "minimal" and require "none".
|
||||
if m.thinkingLevel != "" && m.thinkingLevel != "off" {
|
||||
parts := strings.SplitN(msg.ModelString, "/", 2)
|
||||
if len(parts) == 2 {
|
||||
modelName := parts[1]
|
||||
currentLevel := models.ParseThinkingLevel(m.thinkingLevel)
|
||||
if !models.IsValidThinkingLevelForModel(currentLevel, modelName) {
|
||||
fallback := models.SuggestThinkingLevelFallback(currentLevel, modelName)
|
||||
if fallback != models.ThinkingOff {
|
||||
m.printSystemMessage(fmt.Sprintf(
|
||||
"Note: Model %s doesn't support '%s' thinking level. Adjusted to '%s'.",
|
||||
modelName, currentLevel, fallback,
|
||||
))
|
||||
m.thinkingLevel = string(fallback)
|
||||
if m.setThinkingLevel != nil {
|
||||
_ = m.setThinkingLevel(string(fallback))
|
||||
}
|
||||
go func() { _ = prefs.SaveThinkingLevelPreference(string(fallback)) }()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := m.setModel(msg.ModelString); err != nil {
|
||||
m.printSystemMessage(fmt.Sprintf("Failed to switch model: %v", err))
|
||||
} else {
|
||||
@@ -2656,7 +2681,7 @@ func (m *AppModel) renderStatusBar() string {
|
||||
|
||||
// cycleThinkingLevel advances to the next thinking level and applies it.
|
||||
func (m *AppModel) cycleThinkingLevel() {
|
||||
levels := []string{"off", "minimal", "low", "medium", "high"}
|
||||
levels := []string{"off", "none", "minimal", "low", "medium", "high"}
|
||||
current := m.thinkingLevel
|
||||
if current == "" {
|
||||
current = "off"
|
||||
@@ -3841,6 +3866,30 @@ func (m *AppModel) handleModelCommand(args string) tea.Cmd {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if thinking level needs adjustment for the new model.
|
||||
// Some models (e.g., OpenAI gpt-5.4) don't support "minimal" and require "none".
|
||||
if m.thinkingLevel != "" && m.thinkingLevel != "off" {
|
||||
parts := strings.SplitN(args, "/", 2)
|
||||
if len(parts) == 2 {
|
||||
modelName := parts[1]
|
||||
currentLevel := models.ParseThinkingLevel(m.thinkingLevel)
|
||||
if !models.IsValidThinkingLevelForModel(currentLevel, modelName) {
|
||||
fallback := models.SuggestThinkingLevelFallback(currentLevel, modelName)
|
||||
if fallback != models.ThinkingOff {
|
||||
m.printSystemMessage(fmt.Sprintf(
|
||||
"Note: Model %s doesn't support '%s' thinking level. Adjusted to '%s'.",
|
||||
modelName, currentLevel, fallback,
|
||||
))
|
||||
m.thinkingLevel = string(fallback)
|
||||
if m.setThinkingLevel != nil {
|
||||
_ = m.setThinkingLevel(string(fallback))
|
||||
}
|
||||
go func() { _ = prefs.SaveThinkingLevelPreference(string(fallback)) }()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Direct model switch with the provided model string.
|
||||
previousModel := m.providerName + "/" + m.modelName
|
||||
if err := m.setModel(args); err != nil {
|
||||
@@ -3945,7 +3994,7 @@ func (m *AppModel) handleThinkingCommand(args string) tea.Cmd {
|
||||
// Parse and validate the level.
|
||||
level := models.ParseThinkingLevel(args)
|
||||
if string(level) != strings.ToLower(args) {
|
||||
m.printSystemMessage(fmt.Sprintf("Unknown thinking level: %q. Use: off, minimal, low, medium, high", args))
|
||||
m.printSystemMessage(fmt.Sprintf("Unknown thinking level: %q. Use: off, none, minimal, low, medium, high", args))
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
+21
-4
@@ -543,6 +543,23 @@ func (m *Kit) SetModel(ctx context.Context, modelString string) error {
|
||||
systemPrompt, _ := config.LoadSystemPrompt(viper.GetString("system-prompt"))
|
||||
thinkingLevel := models.ParseThinkingLevel(viper.GetString("thinking-level"))
|
||||
|
||||
// Validate and adjust thinking level for the target model.
|
||||
// Some models (e.g., OpenAI gpt-5.4) don't support "minimal" and require "none".
|
||||
if thinkingLevel != models.ThinkingOff {
|
||||
parts := strings.SplitN(modelString, "/", 2)
|
||||
if len(parts) == 2 {
|
||||
modelName := parts[1]
|
||||
if !models.IsValidThinkingLevelForModel(thinkingLevel, modelName) {
|
||||
fallback := models.SuggestThinkingLevelFallback(thinkingLevel, modelName)
|
||||
if fallback != models.ThinkingOff {
|
||||
// Adjust the thinking level in viper so the change persists.
|
||||
viper.Set("thinking-level", string(fallback))
|
||||
thinkingLevel = fallback
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// With message-level caching, thinking and caching can work together.
|
||||
// No need to disable caching when thinking is enabled.
|
||||
cfg := &models.ProviderConfig{
|
||||
@@ -866,10 +883,10 @@ type Options struct {
|
||||
MaxTokens int
|
||||
|
||||
// ThinkingLevel sets the reasoning effort for models that support
|
||||
// extended thinking. Valid values: "off", "low", "medium", "high".
|
||||
// "" = let the precedence chain resolve a level (env → config →
|
||||
// per-model → "off"). Use [Kit.SetThinkingLevel] to change at
|
||||
// runtime.
|
||||
// extended thinking. Valid values: "off", "none", "minimal", "low",
|
||||
// "medium", "high". "" = let the precedence chain resolve a level
|
||||
// (env → config → per-model → "off"). Use [Kit.SetThinkingLevel]
|
||||
// to change at runtime.
|
||||
ThinkingLevel string
|
||||
|
||||
// Temperature controls sampling randomness (typically 0.0–2.0).
|
||||
|
||||
@@ -85,7 +85,7 @@ host, err := kit.New(ctx, &kit.Options{
|
||||
// resolve a value (KIT_* env → .kit.yml → modelSettings/customModels →
|
||||
// 8192 floor for MaxTokens, provider defaults for samplers).
|
||||
MaxTokens: 16384, // 0 = auto-resolve; non-zero suppresses right-sizing
|
||||
ThinkingLevel: "medium", // "off", "low", "medium", "high" ("" = default)
|
||||
ThinkingLevel: "medium", // "off", "none", "minimal", "low", "medium", "high" ("" = default)
|
||||
Temperature: ptrFloat32(0.2), // pointer so explicit 0.0 != unset
|
||||
TopP: nil, // nil = leave provider/per-model default
|
||||
TopK: nil, // nil = leave provider/per-model default
|
||||
@@ -154,7 +154,7 @@ func ptrFloat32(v float32) *float32 { return &v }
|
||||
| Field | Type | Empty/nil means | Notes |
|
||||
|-------|------|-----------------|-------|
|
||||
| `MaxTokens` | `int` | Auto-resolve (env → config → per-model → 8192 floor) | Non-zero suppresses `rightSizeMaxTokens` |
|
||||
| `ThinkingLevel` | `string` | Auto-resolve (→ `"off"`) | Valid: `"off"`, `"low"`, `"medium"`, `"high"` (and `"minimal"` for some providers) |
|
||||
| `ThinkingLevel` | `string` | Auto-resolve (→ `"off"`) | Valid: `"off"`, `"none"`, `"minimal"`, `"low"`, `"medium"`, `"high"` |
|
||||
| `Temperature` | `*float32` | Leave provider/per-model default | Pointer so explicit `0.0` ≠ unset |
|
||||
| `TopP` | `*float32` | Leave provider/per-model default | |
|
||||
| `TopK` | `*int32` | Leave provider/per-model default | |
|
||||
|
||||
@@ -66,7 +66,7 @@ These commands are available inside the Kit TUI during an interactive session:
|
||||
| `/servers` | Show connected MCP servers |
|
||||
| `/model [name]` | Switch model or open model selector |
|
||||
| `/theme [name]` | Switch color theme or list available themes |
|
||||
| `/thinking [level]` | Set thinking level (off, minimal, low, medium, high) |
|
||||
| `/thinking [level]` | Set thinking level (off, none, minimal, low, medium, high) |
|
||||
| `/compact [focus]` | Summarize older messages to free context |
|
||||
| `/clear` | Clear conversation |
|
||||
| `/clear-queue` | Clear queued messages |
|
||||
|
||||
@@ -59,7 +59,7 @@ These flags control Kit's behavior. When a prompt is passed as a positional argu
|
||||
| `--stop-sequences` | — | — | Custom stop sequences (comma-separated) |
|
||||
| `--frequency-penalty` | — | `0.0` | Penalize frequent tokens (0.0–2.0) |
|
||||
| `--presence-penalty` | — | `0.0` | Penalize present tokens (0.0–2.0) |
|
||||
| `--thinking-level` | — | `off` | Extended thinking level: off, minimal, low, medium, high |
|
||||
| `--thinking-level` | — | `off` | Extended thinking level: off, none, minimal, low, medium, high |
|
||||
|
||||
## System
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ stream: true
|
||||
| `compact` | bool | `false` | Enable compact output mode |
|
||||
| `system-prompt` | string | — | System prompt text or file path |
|
||||
| `max-steps` | int | `0` | Maximum agent steps (0 = unlimited) |
|
||||
| `thinking-level` | string | `off` | Extended thinking: off, minimal, low, medium, high |
|
||||
| `thinking-level` | string | `off` | Extended thinking: off, none, minimal, low, medium, high |
|
||||
| `provider-api-key` | string | — | API key for the provider |
|
||||
| `provider-url` | string | — | Base URL for provider API |
|
||||
| `tls-skip-verify` | bool | `false` | Skip TLS certificate verification |
|
||||
|
||||
@@ -24,7 +24,7 @@ host, err := kit.New(ctx, &kit.Options{
|
||||
|
||||
// Generation parameters (override env/config/per-model defaults)
|
||||
MaxTokens: 16384, // 0 = auto-resolve; non-zero suppresses right-sizing
|
||||
ThinkingLevel: "medium", // "off", "low", "medium", "high"
|
||||
ThinkingLevel: "medium", // "off", "none", "minimal", "low", "medium", "high"
|
||||
Temperature: ptrFloat32(0.2), // pointer so explicit 0.0 != unset
|
||||
TopP: nil, // nil = provider/per-model default
|
||||
TopK: nil,
|
||||
@@ -107,7 +107,7 @@ defaults for samplers).
|
||||
| Field | Type | Default | Description |
|
||||
|-------|------|---------|-------------|
|
||||
| `MaxTokens` | `int` | auto-resolved | Max output tokens per response. `0` = auto-resolve; non-zero suppresses automatic right-sizing (same semantics as `--max-tokens`). |
|
||||
| `ThinkingLevel` | `string` | auto-resolved | Reasoning effort: `"off"`, `"low"`, `"medium"`, `"high"` (some providers also accept `"minimal"`). `""` falls through to config/env/per-model/`"off"`. |
|
||||
| `ThinkingLevel` | `string` | auto-resolved | Reasoning effort: `"off"`, `"none"`, `"minimal"`, `"low"`, `"medium"`, `"high"`. `""` falls through to config/env/per-model/`"off"`. |
|
||||
| `Temperature` | `*float32` | — | Sampling randomness. Pointer type so explicit `0.0` is distinguishable from "unset". |
|
||||
| `TopP` | `*float32` | — | Nucleus sampling cutoff. `nil` leaves provider/per-model default. |
|
||||
| `TopK` | `*int32` | — | Top-K sampling limit. `nil` leaves provider/per-model default. |
|
||||
|
||||
@@ -115,7 +115,7 @@ entirely in-code via `Options`, without touching `.kit.yml` or `viper.Set()`:
|
||||
host, _ := kit.New(ctx, &kit.Options{
|
||||
Model: "anthropic/claude-sonnet-4-5-20250929",
|
||||
MaxTokens: 16384, // 0 = auto-resolve (env → config → per-model → floor)
|
||||
ThinkingLevel: "high", // "off" | "low" | "medium" | "high"
|
||||
ThinkingLevel: "high", // "off" | "none" | "minimal" | "low" | "medium" | "high"
|
||||
Temperature: ptrFloat32(0.2), // nil = provider/per-model default
|
||||
ProviderAPIKey: os.Getenv("MY_SECRET"), // overrides pre-existing viper state
|
||||
ProviderURL: "https://proxy.internal/v1",
|
||||
|
||||
Reference in New Issue
Block a user